Faiss
Main Page
Namespaces
Classes
Files
File List
All
Classes
Namespaces
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
gpu
utils
LoadStoreOperators.cuh
1
/**
2
* Copyright (c) 2015-present, Facebook, Inc.
3
* All rights reserved.
4
*
5
* This source code is licensed under the BSD+Patents license found in the
6
* LICENSE file in the root directory of this source tree.
7
*/
8
9
// Copyright 2004-present Facebook. All Rights Reserved.
10
11
#pragma once
12
13
#include "Float16.cuh"
14
15
#ifndef __HALF2_TO_UI
16
// cuda_fp16.hpp doesn't export this
17
#define __HALF2_TO_UI(var) *(reinterpret_cast<unsigned int *>(&(var)))
18
#endif
19
20
21
//
22
// Templated wrappers to express load/store for different scalar and vector
23
// types, so kernels can have the same written form but can operate
24
// over half and float, and on vector types transparently
25
//
26
27
namespace
faiss {
namespace
gpu {
28
29
template
<
typename
T>
30
struct
LoadStore
{
31
static
inline
__device__ T load(
void
* p) {
32
return
*((T*) p);
33
}
34
35
static
inline
__device__
void
store(
void
* p,
const
T& v) {
36
*((T*) p) = v;
37
}
38
};
39
40
#ifdef FAISS_USE_FLOAT16
41
42
template
<>
43
struct
LoadStore
<Half4> {
44
static
inline
__device__ Half4 load(
void
* p) {
45
Half4 out;
46
#if CUDA_VERSION >= 9000
47
asm
(
"ld.global.v2.u32 {%0, %1}, [%2];"
:
48
"=r"
(__HALF2_TO_UI(out.a)),
"=r"
(__HALF2_TO_UI(out.b)) :
"l"
(p));
49
#else
50
asm
(
"ld.global.v2.u32 {%0, %1}, [%2];"
:
51
"=r"
(out.a.x),
"=r"
(out.b.x) :
"l"
(p));
52
#endif
53
return
out;
54
}
55
56
static
inline
__device__
void
store(
void
* p, Half4& v) {
57
#if CUDA_VERSION >= 9000
58
asm
(
"st.v2.u32 [%0], {%1, %2};"
: :
"l"
(p),
59
"r"
(__HALF2_TO_UI(v.a)),
"r"
(__HALF2_TO_UI(v.b)));
60
#else
61
asm
(
"st.v2.u32 [%0], {%1, %2};"
: :
"l"
(p),
"r"
(v.a.x),
"r"
(v.b.x));
62
#endif
63
}
64
};
65
66
template
<>
67
struct
LoadStore<Half8> {
68
static
inline
__device__ Half8 load(
void
* p) {
69
Half8 out;
70
#if CUDA_VERSION >= 9000
71
asm
(
"ld.global.v4.u32 {%0, %1, %2, %3}, [%4];"
:
72
"=r"
(__HALF2_TO_UI(out.a.a)),
"=r"
(__HALF2_TO_UI(out.a.b)),
73
"=r"
(__HALF2_TO_UI(out.b.a)),
"=r"
(__HALF2_TO_UI(out.b.b)) :
"l"
(p));
74
#else
75
asm
(
"ld.global.v4.u32 {%0, %1, %2, %3}, [%4];"
:
76
"=r"
(out.a.a.x),
"=r"
(out.a.b.x),
77
"=r"
(out.b.a.x),
"=r"
(out.b.b.x) :
"l"
(p));
78
#endif
79
return
out;
80
}
81
82
static
inline
__device__
void
store(
void
* p, Half8& v) {
83
#if CUDA_VERSION >= 9000
84
asm
(
"st.v4.u32 [%0], {%1, %2, %3, %4};"
85
: :
"l"
(p),
"r"
(__HALF2_TO_UI(v.a.a)),
"r"
(__HALF2_TO_UI(v.a.b)),
86
"r"
(__HALF2_TO_UI(v.b.a)),
"r"
(__HALF2_TO_UI(v.b.b)));
87
#else
88
asm
(
"st.v4.u32 [%0], {%1, %2, %3, %4};"
89
: :
"l"
(p),
"r"
(v.a.a.x),
"r"
(v.a.b.x),
"r"
(v.b.a.x),
"r"
(v.b.b.x));
90
#endif
91
}
92
};
93
94
#endif // FAISS_USE_FLOAT16
95
96
} }
// namespace
faiss::gpu::LoadStore
Definition:
LoadStoreOperators.cuh:30
Generated by
1.8.5