Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
Float16.cuh
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the CC-by-NC license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // Copyright 2004-present Facebook. All Rights Reserved.
10 
11 #pragma once
12 
13 #include <cuda.h>
14 #include "../GpuResources.h"
15 #include "DeviceTensor.cuh"
16 
17 // For float16, We use the half datatype, expecting it to be a struct
18 // as in CUDA 7.5.
19 #if CUDA_VERSION >= 7050
20 #define FAISS_USE_FLOAT16 1
21 
22 // Some compute capabilities have full float16 ALUs.
23 #if __CUDA_ARCH__ >= 530
24 #define FAISS_USE_FULL_FLOAT16 1
25 #endif // __CUDA_ARCH__ types
26 
27 #endif // CUDA_VERSION
28 
29 #ifdef FAISS_USE_FLOAT16
30 #include <cuda_fp16.h>
31 #endif
32 
33 namespace faiss { namespace gpu {
34 
35 #ifdef FAISS_USE_FLOAT16
36 
37 // 64 bytes containing 4 half (float16) values
38 struct Half4 {
39  half2 a;
40  half2 b;
41 };
42 
43 inline __device__ float4 half4ToFloat4(Half4 v) {
44  float2 a = __half22float2(v.a);
45  float2 b = __half22float2(v.b);
46 
47  float4 out;
48  out.x = a.x;
49  out.y = a.y;
50  out.z = b.x;
51  out.w = b.y;
52 
53  return out;
54 }
55 
56 inline __device__ Half4 float4ToHalf4(float4 v) {
57  float2 a;
58  a.x = v.x;
59  a.y = v.y;
60 
61  float2 b;
62  b.x = v.z;
63  b.y = v.w;
64 
65  Half4 out;
66  out.a = __float22half2_rn(a);
67  out.b = __float22half2_rn(b);
68 
69  return out;
70 }
71 
72 // 128 bytes containing 8 half (float16) values
73 struct Half8 {
74  Half4 a;
75  Half4 b;
76 };
77 
78 /// Returns true if the given device supports native float16 math
79 bool getDeviceSupportsFloat16Math(int device);
80 
81 /// Copies `in` to `out` while performing a float32 -> float16 conversion
82 void runConvertToFloat16(half* out,
83  const float* in,
84  size_t num,
85  cudaStream_t stream);
86 
87 /// Copies `in` to `out` while performing a float16 -> float32
88 /// conversion
89 void runConvertToFloat32(float* out,
90  const half* in,
91  size_t num,
92  cudaStream_t stream);
93 
94 template <int Dim>
95 void toHalf(cudaStream_t stream,
96  Tensor<float, Dim, true>& in,
97  Tensor<half, Dim, true>& out) {
98  FAISS_ASSERT(in.numElements() == out.numElements());
99 
100  // The memory is contiguous (the `true`), so apply a pointwise
101  // kernel to convert
102  runConvertToFloat16(out.data(), in.data(), in.numElements(), stream);
103 }
104 
105 template <int Dim>
106 DeviceTensor<half, Dim, true> toHalf(GpuResources* resources,
107  cudaStream_t stream,
108  Tensor<float, Dim, true>& in) {
109  DeviceTensor<half, Dim, true> out;
110  if (resources) {
111  out = std::move(DeviceTensor<half, Dim, true>(
112  resources->getMemoryManagerCurrentDevice(),
113  in.sizes(),
114  stream));
115  } else {
116  out = std::move(DeviceTensor<half, Dim, true>(in.sizes()));
117  }
118 
119  toHalf<Dim>(stream, in, out);
120  return out;
121 }
122 
123 template <int Dim>
124 void fromHalf(cudaStream_t stream,
125  Tensor<half, Dim, true>& in,
126  Tensor<float, Dim, true>& out) {
127  FAISS_ASSERT(in.numElements() == out.numElements());
128 
129  // The memory is contiguous (the `true`), so apply a pointwise
130  // kernel to convert
131  runConvertToFloat32(out.data(), in.data(), in.numElements(), stream);
132 }
133 
134 template <int Dim>
135 DeviceTensor<float, Dim, true> fromHalf(GpuResources* resources,
136  cudaStream_t stream,
137  Tensor<half, Dim, true>& in) {
138  DeviceTensor<float, Dim, true> out;
139  if (resources) {
140  out = std::move(DeviceTensor<float, Dim, true>(
141  resources->getMemoryManagerCurrentDevice(),
142  in.sizes(),
143  stream));
144  } else {
145  out = std::move(DeviceTensor<float, Dim, true>(in.sizes()));
146  }
147 
148  fromHalf<Dim>(stream, in, out);
149  return out;
150 }
151 
152 half hostFloat2Half(float v);
153 
154 #endif // FAISS_USE_FLOAT16
155 
156 } } // namespace