Faiss
Main Page
Namespaces
Classes
Files
File List
All
Classes
Namespaces
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
gpu
utils
LoadStoreOperators.cuh
1
/**
2
* Copyright (c) 2015-present, Facebook, Inc.
3
* All rights reserved.
4
*
5
* This source code is licensed under the BSD+Patents license found in the
6
* LICENSE file in the root directory of this source tree.
7
*/
8
9
10
#pragma once
11
12
#include "Float16.cuh"
13
14
#ifndef __HALF2_TO_UI
15
// cuda_fp16.hpp doesn't export this
16
#define __HALF2_TO_UI(var) *(reinterpret_cast<unsigned int *>(&(var)))
17
#endif
18
19
20
//
21
// Templated wrappers to express load/store for different scalar and vector
22
// types, so kernels can have the same written form but can operate
23
// over half and float, and on vector types transparently
24
//
25
26
namespace
faiss {
namespace
gpu {
27
28
template
<
typename
T>
29
struct
LoadStore
{
30
static
inline
__device__ T load(
void
* p) {
31
return
*((T*) p);
32
}
33
34
static
inline
__device__
void
store(
void
* p,
const
T& v) {
35
*((T*) p) = v;
36
}
37
};
38
39
#ifdef FAISS_USE_FLOAT16
40
41
template
<>
42
struct
LoadStore
<Half4> {
43
static
inline
__device__ Half4 load(
void
* p) {
44
Half4 out;
45
#if CUDA_VERSION >= 9000
46
asm
(
"ld.global.v2.u32 {%0, %1}, [%2];"
:
47
"=r"
(__HALF2_TO_UI(out.a)),
"=r"
(__HALF2_TO_UI(out.b)) :
"l"
(p));
48
#else
49
asm
(
"ld.global.v2.u32 {%0, %1}, [%2];"
:
50
"=r"
(out.a.x),
"=r"
(out.b.x) :
"l"
(p));
51
#endif
52
return
out;
53
}
54
55
static
inline
__device__
void
store(
void
* p, Half4& v) {
56
#if CUDA_VERSION >= 9000
57
asm
(
"st.v2.u32 [%0], {%1, %2};"
: :
"l"
(p),
58
"r"
(__HALF2_TO_UI(v.a)),
"r"
(__HALF2_TO_UI(v.b)));
59
#else
60
asm
(
"st.v2.u32 [%0], {%1, %2};"
: :
"l"
(p),
"r"
(v.a.x),
"r"
(v.b.x));
61
#endif
62
}
63
};
64
65
template
<>
66
struct
LoadStore<Half8> {
67
static
inline
__device__ Half8 load(
void
* p) {
68
Half8 out;
69
#if CUDA_VERSION >= 9000
70
asm
(
"ld.global.v4.u32 {%0, %1, %2, %3}, [%4];"
:
71
"=r"
(__HALF2_TO_UI(out.a.a)),
"=r"
(__HALF2_TO_UI(out.a.b)),
72
"=r"
(__HALF2_TO_UI(out.b.a)),
"=r"
(__HALF2_TO_UI(out.b.b)) :
"l"
(p));
73
#else
74
asm
(
"ld.global.v4.u32 {%0, %1, %2, %3}, [%4];"
:
75
"=r"
(out.a.a.x),
"=r"
(out.a.b.x),
76
"=r"
(out.b.a.x),
"=r"
(out.b.b.x) :
"l"
(p));
77
#endif
78
return
out;
79
}
80
81
static
inline
__device__
void
store(
void
* p, Half8& v) {
82
#if CUDA_VERSION >= 9000
83
asm
(
"st.v4.u32 [%0], {%1, %2, %3, %4};"
84
: :
"l"
(p),
"r"
(__HALF2_TO_UI(v.a.a)),
"r"
(__HALF2_TO_UI(v.a.b)),
85
"r"
(__HALF2_TO_UI(v.b.a)),
"r"
(__HALF2_TO_UI(v.b.b)));
86
#else
87
asm
(
"st.v4.u32 [%0], {%1, %2, %3, %4};"
88
: :
"l"
(p),
"r"
(v.a.a.x),
"r"
(v.a.b.x),
"r"
(v.b.a.x),
"r"
(v.b.b.x));
89
#endif
90
}
91
};
92
93
#endif // FAISS_USE_FLOAT16
94
95
} }
// namespace
faiss::gpu::LoadStore
Definition:
LoadStoreOperators.cuh:29
Generated by
1.8.5