Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
Float16.cuh
1 
2 /**
3  * Copyright (c) 2015-present, Facebook, Inc.
4  * All rights reserved.
5  *
6  * This source code is licensed under the CC-by-NC license found in the
7  * LICENSE file in the root directory of this source tree.
8  */
9 
10 // Copyright 2004-present Facebook. All Rights Reserved.
11 
12 #pragma once
13 
14 #include <cuda.h>
15 #include "../GpuResources.h"
16 #include "DeviceTensor.cuh"
17 
18 // For float16, We use the half datatype, expecting it to be a struct
19 // as in CUDA 7.5.
20 #if CUDA_VERSION >= 7050
21 #define FAISS_USE_FLOAT16 1
22 
23 // Some compute capabilities have full float16 ALUs.
24 #if __CUDA_ARCH__ >= 530
25 #define FAISS_USE_FULL_FLOAT16 1
26 #endif // __CUDA_ARCH__ types
27 
28 #endif // CUDA_VERSION
29 
30 #ifdef FAISS_USE_FLOAT16
31 #include <cuda_fp16.h>
32 #endif
33 
34 namespace faiss { namespace gpu {
35 
36 #ifdef FAISS_USE_FLOAT16
37 
38 // 64 bytes containing 4 half (float16) values
39 struct Half4 {
40  half2 a;
41  half2 b;
42 };
43 
44 inline __device__ float4 half4ToFloat4(Half4 v) {
45  float2 a = __half22float2(v.a);
46  float2 b = __half22float2(v.b);
47 
48  float4 out;
49  out.x = a.x;
50  out.y = a.y;
51  out.z = b.x;
52  out.w = b.y;
53 
54  return out;
55 }
56 
57 inline __device__ Half4 float4ToHalf4(float4 v) {
58  float2 a;
59  a.x = v.x;
60  a.y = v.y;
61 
62  float2 b;
63  b.x = v.z;
64  b.y = v.w;
65 
66  Half4 out;
67  out.a = __float22half2_rn(a);
68  out.b = __float22half2_rn(b);
69 
70  return out;
71 }
72 
73 // 128 bytes containing 8 half (float16) values
74 struct Half8 {
75  Half4 a;
76  Half4 b;
77 };
78 
79 /// Returns true if the given device supports native float16 math
80 bool getDeviceSupportsFloat16Math(int device);
81 
82 /// Copies `in` to `out` while performing a float32 -> float16 conversion
83 void runConvertToFloat16(half* out,
84  const float* in,
85  size_t num,
86  cudaStream_t stream);
87 
88 /// Copies `in` to `out` while performing a float16 -> float32
89 /// conversion
90 void runConvertToFloat32(float* out,
91  const half* in,
92  size_t num,
93  cudaStream_t stream);
94 
95 template <int Dim>
96 void toHalf(cudaStream_t stream,
97  Tensor<float, Dim, true>& in,
98  Tensor<half, Dim, true>& out) {
99  FAISS_ASSERT(in.numElements() == out.numElements());
100 
101  // The memory is contiguous (the `true`), so apply a pointwise
102  // kernel to convert
103  runConvertToFloat16(out.data(), in.data(), in.numElements(), stream);
104 }
105 
106 template <int Dim>
107 DeviceTensor<half, Dim, true> toHalf(GpuResources* resources,
108  cudaStream_t stream,
109  Tensor<float, Dim, true>& in) {
110  DeviceTensor<half, Dim, true> out;
111  if (resources) {
112  out = std::move(DeviceTensor<half, Dim, true>(
113  resources->getMemoryManagerCurrentDevice(),
114  in.sizes(),
115  stream));
116  } else {
117  out = std::move(DeviceTensor<half, Dim, true>(in.sizes()));
118  }
119 
120  toHalf<Dim>(stream, in, out);
121  return out;
122 }
123 
124 template <int Dim>
125 void fromHalf(cudaStream_t stream,
126  Tensor<half, Dim, true>& in,
127  Tensor<float, Dim, true>& out) {
128  FAISS_ASSERT(in.numElements() == out.numElements());
129 
130  // The memory is contiguous (the `true`), so apply a pointwise
131  // kernel to convert
132  runConvertToFloat32(out.data(), in.data(), in.numElements(), stream);
133 }
134 
135 template <int Dim>
136 DeviceTensor<float, Dim, true> fromHalf(GpuResources* resources,
137  cudaStream_t stream,
138  Tensor<half, Dim, true>& in) {
139  DeviceTensor<float, Dim, true> out;
140  if (resources) {
141  out = std::move(DeviceTensor<float, Dim, true>(
142  resources->getMemoryManagerCurrentDevice(),
143  in.sizes(),
144  stream));
145  } else {
146  out = std::move(DeviceTensor<float, Dim, true>(in.sizes()));
147  }
148 
149  fromHalf<Dim>(stream, in, out);
150  return out;
151 }
152 
153 half hostFloat2Half(float v);
154 
155 #endif // FAISS_USE_FLOAT16
156 
157 } } // namespace