mirror of
https://github.com/facebookresearch/faiss.git
synced 2025-06-03 21:54:02 +08:00
various bugfixes from github issues kmean with some frozen centroids GPU better tiling for large flat datasets default AVX for vector ops
2123 lines
285 KiB
HTML
2123 lines
285 KiB
HTML
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
|
<html xmlns="http://www.w3.org/1999/xhtml">
|
|
<head>
|
|
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
|
|
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
|
|
<meta name="generator" content="Doxygen 1.8.5"/>
|
|
<title>Faiss: /data/users/matthijs/github_faiss/faiss/utils.cpp Source File</title>
|
|
<link href="tabs.css" rel="stylesheet" type="text/css"/>
|
|
<script type="text/javascript" src="jquery.js"></script>
|
|
<script type="text/javascript" src="dynsections.js"></script>
|
|
<link href="search/search.css" rel="stylesheet" type="text/css"/>
|
|
<script type="text/javascript" src="search/search.js"></script>
|
|
<script type="text/javascript">
|
|
$(document).ready(function() { searchBox.OnSelectItem(0); });
|
|
</script>
|
|
<link href="doxygen.css" rel="stylesheet" type="text/css" />
|
|
</head>
|
|
<body>
|
|
<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
|
|
<div id="titlearea">
|
|
<table cellspacing="0" cellpadding="0">
|
|
<tbody>
|
|
<tr style="height: 56px;">
|
|
<td style="padding-left: 0.5em;">
|
|
<div id="projectname">Faiss
|
|
</div>
|
|
</td>
|
|
</tr>
|
|
</tbody>
|
|
</table>
|
|
</div>
|
|
<!-- end header part -->
|
|
<!-- Generated by Doxygen 1.8.5 -->
|
|
<script type="text/javascript">
|
|
var searchBox = new SearchBox("searchBox", "search",false,'Search');
|
|
</script>
|
|
<div id="navrow1" class="tabs">
|
|
<ul class="tablist">
|
|
<li><a href="index.html"><span>Main Page</span></a></li>
|
|
<li><a href="namespaces.html"><span>Namespaces</span></a></li>
|
|
<li><a href="annotated.html"><span>Classes</span></a></li>
|
|
<li class="current"><a href="files.html"><span>Files</span></a></li>
|
|
<li>
|
|
<div id="MSearchBox" class="MSearchBoxInactive">
|
|
<span class="left">
|
|
<img id="MSearchSelect" src="search/mag_sel.png"
|
|
onmouseover="return searchBox.OnSearchSelectShow()"
|
|
onmouseout="return searchBox.OnSearchSelectHide()"
|
|
alt=""/>
|
|
<input type="text" id="MSearchField" value="Search" accesskey="S"
|
|
onfocus="searchBox.OnSearchFieldFocus(true)"
|
|
onblur="searchBox.OnSearchFieldFocus(false)"
|
|
onkeyup="searchBox.OnSearchFieldChange(event)"/>
|
|
</span><span class="right">
|
|
<a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
|
|
</span>
|
|
</div>
|
|
</li>
|
|
</ul>
|
|
</div>
|
|
<div id="navrow2" class="tabs2">
|
|
<ul class="tablist">
|
|
<li><a href="files.html"><span>File List</span></a></li>
|
|
</ul>
|
|
</div>
|
|
</div><!-- top -->
|
|
<!-- window showing the filter options -->
|
|
<div id="MSearchSelectWindow"
|
|
onmouseover="return searchBox.OnSearchSelectShow()"
|
|
onmouseout="return searchBox.OnSearchSelectHide()"
|
|
onkeydown="return searchBox.OnSearchSelectKey(event)">
|
|
<a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(0)"><span class="SelectionMark"> </span>All</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(1)"><span class="SelectionMark"> </span>Classes</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(2)"><span class="SelectionMark"> </span>Namespaces</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(3)"><span class="SelectionMark"> </span>Functions</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(4)"><span class="SelectionMark"> </span>Variables</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(5)"><span class="SelectionMark"> </span>Typedefs</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(6)"><span class="SelectionMark"> </span>Enumerations</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(7)"><span class="SelectionMark"> </span>Enumerator</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(8)"><span class="SelectionMark"> </span>Friends</a></div>
|
|
|
|
<!-- iframe showing the search results (closed by default) -->
|
|
<div id="MSearchResultsWindow">
|
|
<iframe src="javascript:void(0)" frameborder="0"
|
|
name="MSearchResults" id="MSearchResults">
|
|
</iframe>
|
|
</div>
|
|
|
|
<div class="header">
|
|
<div class="headertitle">
|
|
<div class="title">/data/users/matthijs/github_faiss/faiss/utils.cpp</div> </div>
|
|
</div><!--header-->
|
|
<div class="contents">
|
|
<div class="fragment"><div class="line"><a name="l00001"></a><span class="lineno"> 1</span> <span class="comment">/**</span></div>
|
|
<div class="line"><a name="l00002"></a><span class="lineno"> 2</span> <span class="comment"> * Copyright (c) 2015-present, Facebook, Inc.</span></div>
|
|
<div class="line"><a name="l00003"></a><span class="lineno"> 3</span> <span class="comment"> * All rights reserved.</span></div>
|
|
<div class="line"><a name="l00004"></a><span class="lineno"> 4</span> <span class="comment"> *</span></div>
|
|
<div class="line"><a name="l00005"></a><span class="lineno"> 5</span> <span class="comment"> * This source code is licensed under the BSD+Patents license found in the</span></div>
|
|
<div class="line"><a name="l00006"></a><span class="lineno"> 6</span> <span class="comment"> * LICENSE file in the root directory of this source tree.</span></div>
|
|
<div class="line"><a name="l00007"></a><span class="lineno"> 7</span> <span class="comment"> */</span></div>
|
|
<div class="line"><a name="l00008"></a><span class="lineno"> 8</span> </div>
|
|
<div class="line"><a name="l00009"></a><span class="lineno"> 9</span> <span class="comment">// Copyright 2004-present Facebook. All Rights Reserved</span></div>
|
|
<div class="line"><a name="l00010"></a><span class="lineno"> 10</span> <span class="comment">// -*- c++ -*-</span></div>
|
|
<div class="line"><a name="l00011"></a><span class="lineno"> 11</span> </div>
|
|
<div class="line"><a name="l00012"></a><span class="lineno"> 12</span> <span class="preprocessor">#include "utils.h"</span></div>
|
|
<div class="line"><a name="l00013"></a><span class="lineno"> 13</span> </div>
|
|
<div class="line"><a name="l00014"></a><span class="lineno"> 14</span> <span class="preprocessor">#include <cstdio></span></div>
|
|
<div class="line"><a name="l00015"></a><span class="lineno"> 15</span> <span class="preprocessor">#include <cassert></span></div>
|
|
<div class="line"><a name="l00016"></a><span class="lineno"> 16</span> <span class="preprocessor">#include <cstring></span></div>
|
|
<div class="line"><a name="l00017"></a><span class="lineno"> 17</span> <span class="preprocessor">#include <cmath></span></div>
|
|
<div class="line"><a name="l00018"></a><span class="lineno"> 18</span> </div>
|
|
<div class="line"><a name="l00019"></a><span class="lineno"> 19</span> <span class="preprocessor">#include <immintrin.h></span></div>
|
|
<div class="line"><a name="l00020"></a><span class="lineno"> 20</span> </div>
|
|
<div class="line"><a name="l00021"></a><span class="lineno"> 21</span> </div>
|
|
<div class="line"><a name="l00022"></a><span class="lineno"> 22</span> <span class="preprocessor">#include <sys/time.h></span></div>
|
|
<div class="line"><a name="l00023"></a><span class="lineno"> 23</span> <span class="preprocessor">#include <sys/types.h></span></div>
|
|
<div class="line"><a name="l00024"></a><span class="lineno"> 24</span> <span class="preprocessor">#include <unistd.h></span></div>
|
|
<div class="line"><a name="l00025"></a><span class="lineno"> 25</span> </div>
|
|
<div class="line"><a name="l00026"></a><span class="lineno"> 26</span> <span class="preprocessor">#include <omp.h></span></div>
|
|
<div class="line"><a name="l00027"></a><span class="lineno"> 27</span> </div>
|
|
<div class="line"><a name="l00028"></a><span class="lineno"> 28</span> </div>
|
|
<div class="line"><a name="l00029"></a><span class="lineno"> 29</span> <span class="preprocessor">#include <algorithm></span></div>
|
|
<div class="line"><a name="l00030"></a><span class="lineno"> 30</span> <span class="preprocessor">#include <vector></span></div>
|
|
<div class="line"><a name="l00031"></a><span class="lineno"> 31</span> </div>
|
|
<div class="line"><a name="l00032"></a><span class="lineno"> 32</span> <span class="preprocessor">#include "AuxIndexStructures.h"</span></div>
|
|
<div class="line"><a name="l00033"></a><span class="lineno"> 33</span> <span class="preprocessor">#include "FaissAssert.h"</span></div>
|
|
<div class="line"><a name="l00034"></a><span class="lineno"> 34</span> </div>
|
|
<div class="line"><a name="l00035"></a><span class="lineno"> 35</span> </div>
|
|
<div class="line"><a name="l00036"></a><span class="lineno"> 36</span> </div>
|
|
<div class="line"><a name="l00037"></a><span class="lineno"> 37</span> <span class="preprocessor">#ifndef FINTEGER</span></div>
|
|
<div class="line"><a name="l00038"></a><span class="lineno"> 38</span> <span class="preprocessor"></span><span class="preprocessor">#define FINTEGER long</span></div>
|
|
<div class="line"><a name="l00039"></a><span class="lineno"> 39</span> <span class="preprocessor"></span><span class="preprocessor">#endif</span></div>
|
|
<div class="line"><a name="l00040"></a><span class="lineno"> 40</span> <span class="preprocessor"></span></div>
|
|
<div class="line"><a name="l00041"></a><span class="lineno"> 41</span> </div>
|
|
<div class="line"><a name="l00042"></a><span class="lineno"> 42</span> <span class="keyword">extern</span> <span class="stringliteral">"C"</span> {</div>
|
|
<div class="line"><a name="l00043"></a><span class="lineno"> 43</span> </div>
|
|
<div class="line"><a name="l00044"></a><span class="lineno"> 44</span> <span class="comment">/* declare BLAS functions, see http://www.netlib.org/clapack/cblas/ */</span></div>
|
|
<div class="line"><a name="l00045"></a><span class="lineno"> 45</span> </div>
|
|
<div class="line"><a name="l00046"></a><span class="lineno"> 46</span> <span class="keywordtype">int</span> sgemm_ (<span class="keyword">const</span> <span class="keywordtype">char</span> *transa, <span class="keyword">const</span> <span class="keywordtype">char</span> *transb, FINTEGER *m, FINTEGER *</div>
|
|
<div class="line"><a name="l00047"></a><span class="lineno"> 47</span>  n, FINTEGER *k, <span class="keyword">const</span> <span class="keywordtype">float</span> *alpha, <span class="keyword">const</span> <span class="keywordtype">float</span> *a,</div>
|
|
<div class="line"><a name="l00048"></a><span class="lineno"> 48</span>  FINTEGER *lda, <span class="keyword">const</span> <span class="keywordtype">float</span> *b, FINTEGER *</div>
|
|
<div class="line"><a name="l00049"></a><span class="lineno"> 49</span>  ldb, <span class="keywordtype">float</span> *beta, <span class="keywordtype">float</span> *c, FINTEGER *ldc);</div>
|
|
<div class="line"><a name="l00050"></a><span class="lineno"> 50</span> </div>
|
|
<div class="line"><a name="l00051"></a><span class="lineno"> 51</span> <span class="comment">/* Lapack functions, see http://www.netlib.org/clapack/old/single/sgeqrf.c */</span></div>
|
|
<div class="line"><a name="l00052"></a><span class="lineno"> 52</span> </div>
|
|
<div class="line"><a name="l00053"></a><span class="lineno"> 53</span> <span class="keywordtype">int</span> sgeqrf_ (FINTEGER *m, FINTEGER *n, <span class="keywordtype">float</span> *a, FINTEGER *lda,</div>
|
|
<div class="line"><a name="l00054"></a><span class="lineno"> 54</span>  <span class="keywordtype">float</span> *tau, <span class="keywordtype">float</span> *work, FINTEGER *lwork, FINTEGER *info);</div>
|
|
<div class="line"><a name="l00055"></a><span class="lineno"> 55</span> </div>
|
|
<div class="line"><a name="l00056"></a><span class="lineno"> 56</span> <span class="keywordtype">int</span> sorgqr_(FINTEGER *m, FINTEGER *n, FINTEGER *k, <span class="keywordtype">float</span> *a,</div>
|
|
<div class="line"><a name="l00057"></a><span class="lineno"> 57</span>  FINTEGER *lda, <span class="keywordtype">float</span> *tau, <span class="keywordtype">float</span> *work,</div>
|
|
<div class="line"><a name="l00058"></a><span class="lineno"> 58</span>  FINTEGER *lwork, FINTEGER *info);</div>
|
|
<div class="line"><a name="l00059"></a><span class="lineno"> 59</span> </div>
|
|
<div class="line"><a name="l00060"></a><span class="lineno"> 60</span> </div>
|
|
<div class="line"><a name="l00061"></a><span class="lineno"> 61</span> }</div>
|
|
<div class="line"><a name="l00062"></a><span class="lineno"> 62</span> </div>
|
|
<div class="line"><a name="l00063"></a><span class="lineno"> 63</span> </div>
|
|
<div class="line"><a name="l00064"></a><span class="lineno"> 64</span> <span class="comment">/**************************************************</span></div>
|
|
<div class="line"><a name="l00065"></a><span class="lineno"> 65</span> <span class="comment"> * Get some stats about the system</span></div>
|
|
<div class="line"><a name="l00066"></a><span class="lineno"> 66</span> <span class="comment"> **************************************************/</span></div>
|
|
<div class="line"><a name="l00067"></a><span class="lineno"> 67</span> </div>
|
|
<div class="line"><a name="l00068"></a><span class="lineno"> 68</span> <span class="keyword">namespace </span>faiss {</div>
|
|
<div class="line"><a name="l00069"></a><span class="lineno"> 69</span> </div>
|
|
<div class="line"><a name="l00070"></a><span class="lineno"> 70</span> <span class="preprocessor">#ifdef __AVX__</span></div>
|
|
<div class="line"><a name="l00071"></a><span class="lineno"> 71</span> <span class="preprocessor"></span><span class="preprocessor">#define USE_AVX</span></div>
|
|
<div class="line"><a name="l00072"></a><span class="lineno"> 72</span> <span class="preprocessor"></span><span class="preprocessor">#endif</span></div>
|
|
<div class="line"><a name="l00073"></a><span class="lineno"> 73</span> <span class="preprocessor"></span></div>
|
|
<div class="line"><a name="l00074"></a><span class="lineno"><a class="line" href="namespacefaiss.html#af2a71f7d5402ae02ce169a4cc83020eb"> 74</a></span> <span class="keywordtype">double</span> <a class="code" href="namespacefaiss.html#af2a71f7d5402ae02ce169a4cc83020eb">getmillisecs</a> () {</div>
|
|
<div class="line"><a name="l00075"></a><span class="lineno"> 75</span>  <span class="keyword">struct </span>timeval tv;</div>
|
|
<div class="line"><a name="l00076"></a><span class="lineno"> 76</span>  gettimeofday (&tv, <span class="keyword">nullptr</span>);</div>
|
|
<div class="line"><a name="l00077"></a><span class="lineno"> 77</span>  <span class="keywordflow">return</span> tv.tv_sec * 1e3 + tv.tv_usec * 1e-3;</div>
|
|
<div class="line"><a name="l00078"></a><span class="lineno"> 78</span> }</div>
|
|
<div class="line"><a name="l00079"></a><span class="lineno"> 79</span> </div>
|
|
<div class="line"><a name="l00080"></a><span class="lineno"> 80</span> </div>
|
|
<div class="line"><a name="l00081"></a><span class="lineno"> 81</span> <span class="preprocessor">#ifdef __linux__</span></div>
|
|
<div class="line"><a name="l00082"></a><span class="lineno"> 82</span> <span class="preprocessor"></span></div>
|
|
<div class="line"><a name="l00083"></a><span class="lineno"> 83</span> <span class="keywordtype">size_t</span> <a class="code" href="namespacefaiss.html#aa3af5769b0b649f112332a874c64d361">get_mem_usage_kb</a> ()</div>
|
|
<div class="line"><a name="l00084"></a><span class="lineno"> 84</span> {</div>
|
|
<div class="line"><a name="l00085"></a><span class="lineno"> 85</span>  <span class="keywordtype">int</span> pid = getpid ();</div>
|
|
<div class="line"><a name="l00086"></a><span class="lineno"> 86</span>  <span class="keywordtype">char</span> fname[256];</div>
|
|
<div class="line"><a name="l00087"></a><span class="lineno"> 87</span>  snprintf (fname, 256, <span class="stringliteral">"/proc/%d/status"</span>, pid);</div>
|
|
<div class="line"><a name="l00088"></a><span class="lineno"> 88</span>  FILE * f = fopen (fname, <span class="stringliteral">"r"</span>);</div>
|
|
<div class="line"><a name="l00089"></a><span class="lineno"> 89</span>  FAISS_THROW_IF_NOT_MSG (f, <span class="stringliteral">"cannot open proc status file"</span>);</div>
|
|
<div class="line"><a name="l00090"></a><span class="lineno"> 90</span>  <span class="keywordtype">size_t</span> sz = 0;</div>
|
|
<div class="line"><a name="l00091"></a><span class="lineno"> 91</span>  <span class="keywordflow">for</span> (;;) {</div>
|
|
<div class="line"><a name="l00092"></a><span class="lineno"> 92</span>  <span class="keywordtype">char</span> buf [256];</div>
|
|
<div class="line"><a name="l00093"></a><span class="lineno"> 93</span>  <span class="keywordflow">if</span> (!fgets (buf, 256, f)) <span class="keywordflow">break</span>;</div>
|
|
<div class="line"><a name="l00094"></a><span class="lineno"> 94</span>  <span class="keywordflow">if</span> (sscanf (buf, <span class="stringliteral">"VmRSS: %ld kB"</span>, &sz) == 1) <span class="keywordflow">break</span>;</div>
|
|
<div class="line"><a name="l00095"></a><span class="lineno"> 95</span>  }</div>
|
|
<div class="line"><a name="l00096"></a><span class="lineno"> 96</span>  fclose (f);</div>
|
|
<div class="line"><a name="l00097"></a><span class="lineno"> 97</span>  <span class="keywordflow">return</span> sz;</div>
|
|
<div class="line"><a name="l00098"></a><span class="lineno"> 98</span> }</div>
|
|
<div class="line"><a name="l00099"></a><span class="lineno"> 99</span> </div>
|
|
<div class="line"><a name="l00100"></a><span class="lineno"> 100</span> <span class="preprocessor">#elif __APPLE__</span></div>
|
|
<div class="line"><a name="l00101"></a><span class="lineno"> 101</span> <span class="preprocessor"></span></div>
|
|
<div class="line"><a name="l00102"></a><span class="lineno"> 102</span> <span class="keywordtype">size_t</span> <a class="code" href="namespacefaiss.html#aa3af5769b0b649f112332a874c64d361">get_mem_usage_kb</a> ()</div>
|
|
<div class="line"><a name="l00103"></a><span class="lineno"> 103</span> {</div>
|
|
<div class="line"><a name="l00104"></a><span class="lineno"> 104</span>  fprintf(stderr, <span class="stringliteral">"WARN: get_mem_usage_kb not implemented on the mac\n"</span>);</div>
|
|
<div class="line"><a name="l00105"></a><span class="lineno"> 105</span>  <span class="keywordflow">return</span> 0;</div>
|
|
<div class="line"><a name="l00106"></a><span class="lineno"> 106</span> }</div>
|
|
<div class="line"><a name="l00107"></a><span class="lineno"> 107</span> </div>
|
|
<div class="line"><a name="l00108"></a><span class="lineno"> 108</span> <span class="preprocessor">#endif</span></div>
|
|
<div class="line"><a name="l00109"></a><span class="lineno"> 109</span> <span class="preprocessor"></span></div>
|
|
<div class="line"><a name="l00110"></a><span class="lineno"> 110</span> </div>
|
|
<div class="line"><a name="l00111"></a><span class="lineno"> 111</span> </div>
|
|
<div class="line"><a name="l00112"></a><span class="lineno"> 112</span> <span class="comment">/**************************************************</span></div>
|
|
<div class="line"><a name="l00113"></a><span class="lineno"> 113</span> <span class="comment"> * Random data generation functions</span></div>
|
|
<div class="line"><a name="l00114"></a><span class="lineno"> 114</span> <span class="comment"> **************************************************/</span></div>
|
|
<div class="line"><a name="l00115"></a><span class="lineno"> 115</span> <span class="comment"></span></div>
|
|
<div class="line"><a name="l00116"></a><span class="lineno"> 116</span> <span class="comment">/**</span></div>
|
|
<div class="line"><a name="l00117"></a><span class="lineno"> 117</span> <span class="comment"> * The definition of random functions depends on the architecture:</span></div>
|
|
<div class="line"><a name="l00118"></a><span class="lineno"> 118</span> <span class="comment"> *</span></div>
|
|
<div class="line"><a name="l00119"></a><span class="lineno"> 119</span> <span class="comment"> * - for Linux, we rely on re-entrant functions (random_r). This</span></div>
|
|
<div class="line"><a name="l00120"></a><span class="lineno"> 120</span> <span class="comment"> * provides good quality reproducible random sequences.</span></div>
|
|
<div class="line"><a name="l00121"></a><span class="lineno"> 121</span> <span class="comment"> *</span></div>
|
|
<div class="line"><a name="l00122"></a><span class="lineno"> 122</span> <span class="comment"> * - for Apple, we use rand_r. Apple is trying so hard to deprecate</span></div>
|
|
<div class="line"><a name="l00123"></a><span class="lineno"> 123</span> <span class="comment"> * this function that it removed its definition form stdlib.h, so we</span></div>
|
|
<div class="line"><a name="l00124"></a><span class="lineno"> 124</span> <span class="comment"> * re-declare it below. Fortunately, since it is deprecated, its</span></div>
|
|
<div class="line"><a name="l00125"></a><span class="lineno"> 125</span> <span class="comment"> * prototype should not change much in the forerseeable future.</span></div>
|
|
<div class="line"><a name="l00126"></a><span class="lineno"> 126</span> <span class="comment"> *</span></div>
|
|
<div class="line"><a name="l00127"></a><span class="lineno"> 127</span> <span class="comment"> * Unfortunately, system designers are more concerned with making the</span></div>
|
|
<div class="line"><a name="l00128"></a><span class="lineno"> 128</span> <span class="comment"> * most unpredictable random sequences for cryptographic use, when in</span></div>
|
|
<div class="line"><a name="l00129"></a><span class="lineno"> 129</span> <span class="comment"> * scientific contexts what acutally matters is having reproducible</span></div>
|
|
<div class="line"><a name="l00130"></a><span class="lineno"> 130</span> <span class="comment"> * squences in multi-threaded contexts.</span></div>
|
|
<div class="line"><a name="l00131"></a><span class="lineno"> 131</span> <span class="comment"> */</span></div>
|
|
<div class="line"><a name="l00132"></a><span class="lineno"> 132</span> </div>
|
|
<div class="line"><a name="l00133"></a><span class="lineno"> 133</span> </div>
|
|
<div class="line"><a name="l00134"></a><span class="lineno"> 134</span> <span class="preprocessor">#ifdef __linux__</span></div>
|
|
<div class="line"><a name="l00135"></a><span class="lineno"> 135</span> <span class="preprocessor"></span></div>
|
|
<div class="line"><a name="l00136"></a><span class="lineno"> 136</span> </div>
|
|
<div class="line"><a name="l00137"></a><span class="lineno"> 137</span> </div>
|
|
<div class="line"><a name="l00138"></a><span class="lineno"> 138</span> </div>
|
|
<div class="line"><a name="l00139"></a><span class="lineno"> 139</span> <span class="keywordtype">int</span> <a class="code" href="structfaiss_1_1RandomGenerator.html#a583f124ecacdbe037ac96e23a44dd420">RandomGenerator::rand_int</a> ()</div>
|
|
<div class="line"><a name="l00140"></a><span class="lineno"> 140</span> {</div>
|
|
<div class="line"><a name="l00141"></a><span class="lineno"> 141</span>  int32_t a;</div>
|
|
<div class="line"><a name="l00142"></a><span class="lineno"> 142</span>  random_r (&rand_data, &a);</div>
|
|
<div class="line"><a name="l00143"></a><span class="lineno"> 143</span>  <span class="keywordflow">return</span> a;</div>
|
|
<div class="line"><a name="l00144"></a><span class="lineno"> 144</span> }</div>
|
|
<div class="line"><a name="l00145"></a><span class="lineno"> 145</span> </div>
|
|
<div class="line"><a name="l00146"></a><span class="lineno"> 146</span> <span class="keywordtype">long</span> <a class="code" href="structfaiss_1_1RandomGenerator.html#acbceaa8b017793ca4f8d90e644b0d7f4">RandomGenerator::rand_long</a> ()</div>
|
|
<div class="line"><a name="l00147"></a><span class="lineno"> 147</span> {</div>
|
|
<div class="line"><a name="l00148"></a><span class="lineno"> 148</span>  int32_t a, b;</div>
|
|
<div class="line"><a name="l00149"></a><span class="lineno"> 149</span>  random_r (&rand_data, &a);</div>
|
|
<div class="line"><a name="l00150"></a><span class="lineno"> 150</span>  random_r (&rand_data, &b);</div>
|
|
<div class="line"><a name="l00151"></a><span class="lineno"> 151</span>  <span class="keywordflow">return</span> long(a) | long(b) << 31;</div>
|
|
<div class="line"><a name="l00152"></a><span class="lineno"> 152</span> }</div>
|
|
<div class="line"><a name="l00153"></a><span class="lineno"> 153</span> </div>
|
|
<div class="line"><a name="l00154"></a><span class="lineno"> 154</span> </div>
|
|
<div class="line"><a name="l00155"></a><span class="lineno"> 155</span> <a class="code" href="structfaiss_1_1RandomGenerator.html#a7633c373153f3b2824d2d99382ba20ab">RandomGenerator::RandomGenerator</a> (<span class="keywordtype">long</span> seed)</div>
|
|
<div class="line"><a name="l00156"></a><span class="lineno"> 156</span> {</div>
|
|
<div class="line"><a name="l00157"></a><span class="lineno"> 157</span>  memset (&rand_data, 0, <span class="keyword">sizeof</span> (rand_data));</div>
|
|
<div class="line"><a name="l00158"></a><span class="lineno"> 158</span>  initstate_r (seed, rand_state, <span class="keyword">sizeof</span> (rand_state), &rand_data);</div>
|
|
<div class="line"><a name="l00159"></a><span class="lineno"> 159</span> }</div>
|
|
<div class="line"><a name="l00160"></a><span class="lineno"> 160</span> </div>
|
|
<div class="line"><a name="l00161"></a><span class="lineno"> 161</span> </div>
|
|
<div class="line"><a name="l00162"></a><span class="lineno"> 162</span> <a class="code" href="structfaiss_1_1RandomGenerator.html#a7633c373153f3b2824d2d99382ba20ab">RandomGenerator::RandomGenerator</a> (<span class="keyword">const</span> RandomGenerator & other)</div>
|
|
<div class="line"><a name="l00163"></a><span class="lineno"> 163</span> {</div>
|
|
<div class="line"><a name="l00164"></a><span class="lineno"> 164</span>  memcpy (rand_state, other.rand_state, <span class="keyword">sizeof</span>(rand_state));</div>
|
|
<div class="line"><a name="l00165"></a><span class="lineno"> 165</span>  rand_data = other.rand_data;</div>
|
|
<div class="line"><a name="l00166"></a><span class="lineno"> 166</span>  setstate_r (rand_state, &rand_data);</div>
|
|
<div class="line"><a name="l00167"></a><span class="lineno"> 167</span> }</div>
|
|
<div class="line"><a name="l00168"></a><span class="lineno"> 168</span> </div>
|
|
<div class="line"><a name="l00169"></a><span class="lineno"> 169</span> </div>
|
|
<div class="line"><a name="l00170"></a><span class="lineno"> 170</span> <span class="preprocessor">#elif __APPLE__</span></div>
|
|
<div class="line"><a name="l00171"></a><span class="lineno"> 171</span> <span class="preprocessor"></span></div>
|
|
<div class="line"><a name="l00172"></a><span class="lineno"> 172</span> <span class="keyword">extern</span> <span class="stringliteral">"C"</span> {</div>
|
|
<div class="line"><a name="l00173"></a><span class="lineno"> 173</span> <span class="keywordtype">int</span> rand_r(<span class="keywordtype">unsigned</span> *seed);</div>
|
|
<div class="line"><a name="l00174"></a><span class="lineno"> 174</span> }</div>
|
|
<div class="line"><a name="l00175"></a><span class="lineno"> 175</span> </div>
|
|
<div class="line"><a name="l00176"></a><span class="lineno"> 176</span> <a class="code" href="structfaiss_1_1RandomGenerator.html#a7633c373153f3b2824d2d99382ba20ab">RandomGenerator::RandomGenerator</a> (<span class="keywordtype">long</span> seed)</div>
|
|
<div class="line"><a name="l00177"></a><span class="lineno"> 177</span> {</div>
|
|
<div class="line"><a name="l00178"></a><span class="lineno"> 178</span>  rand_state = seed;</div>
|
|
<div class="line"><a name="l00179"></a><span class="lineno"> 179</span> }</div>
|
|
<div class="line"><a name="l00180"></a><span class="lineno"> 180</span> </div>
|
|
<div class="line"><a name="l00181"></a><span class="lineno"> 181</span> </div>
|
|
<div class="line"><a name="l00182"></a><span class="lineno"> 182</span> <a class="code" href="structfaiss_1_1RandomGenerator.html#a7633c373153f3b2824d2d99382ba20ab">RandomGenerator::RandomGenerator</a> (<span class="keyword">const</span> RandomGenerator & other)</div>
|
|
<div class="line"><a name="l00183"></a><span class="lineno"> 183</span> {</div>
|
|
<div class="line"><a name="l00184"></a><span class="lineno"> 184</span>  rand_state = other.rand_state;</div>
|
|
<div class="line"><a name="l00185"></a><span class="lineno"> 185</span> }</div>
|
|
<div class="line"><a name="l00186"></a><span class="lineno"> 186</span> </div>
|
|
<div class="line"><a name="l00187"></a><span class="lineno"> 187</span> </div>
|
|
<div class="line"><a name="l00188"></a><span class="lineno"> 188</span> <span class="keywordtype">int</span> <a class="code" href="structfaiss_1_1RandomGenerator.html#a583f124ecacdbe037ac96e23a44dd420">RandomGenerator::rand_int</a> ()</div>
|
|
<div class="line"><a name="l00189"></a><span class="lineno"> 189</span> {</div>
|
|
<div class="line"><a name="l00190"></a><span class="lineno"> 190</span>  <span class="comment">// RAND_MAX is 31 bits</span></div>
|
|
<div class="line"><a name="l00191"></a><span class="lineno"> 191</span>  <span class="comment">// try to add more randomness in the lower bits</span></div>
|
|
<div class="line"><a name="l00192"></a><span class="lineno"> 192</span>  <span class="keywordtype">int</span> lowbits = rand_r(&rand_state) >> 15;</div>
|
|
<div class="line"><a name="l00193"></a><span class="lineno"> 193</span>  <span class="keywordflow">return</span> rand_r(&rand_state) ^ lowbits;</div>
|
|
<div class="line"><a name="l00194"></a><span class="lineno"> 194</span> }</div>
|
|
<div class="line"><a name="l00195"></a><span class="lineno"> 195</span> </div>
|
|
<div class="line"><a name="l00196"></a><span class="lineno"> 196</span> <span class="keywordtype">long</span> <a class="code" href="structfaiss_1_1RandomGenerator.html#acbceaa8b017793ca4f8d90e644b0d7f4">RandomGenerator::rand_long</a> ()</div>
|
|
<div class="line"><a name="l00197"></a><span class="lineno"> 197</span> {</div>
|
|
<div class="line"><a name="l00198"></a><span class="lineno"> 198</span>  <span class="keywordflow">return</span> long(random()) | long(random()) << 31;</div>
|
|
<div class="line"><a name="l00199"></a><span class="lineno"> 199</span> }</div>
|
|
<div class="line"><a name="l00200"></a><span class="lineno"> 200</span> </div>
|
|
<div class="line"><a name="l00201"></a><span class="lineno"> 201</span> </div>
|
|
<div class="line"><a name="l00202"></a><span class="lineno"> 202</span> </div>
|
|
<div class="line"><a name="l00203"></a><span class="lineno"> 203</span> <span class="preprocessor">#endif</span></div>
|
|
<div class="line"><a name="l00204"></a><span class="lineno"> 204</span> <span class="preprocessor"></span></div>
|
|
<div class="line"><a name="l00205"></a><span class="lineno"><a class="line" href="structfaiss_1_1RandomGenerator.html#aa460b9bac2593cd1d71305e10ab51af6"> 205</a></span> <span class="keywordtype">int</span> <a class="code" href="structfaiss_1_1RandomGenerator.html#a583f124ecacdbe037ac96e23a44dd420">RandomGenerator::rand_int</a> (<span class="keywordtype">int</span> max)</div>
|
|
<div class="line"><a name="l00206"></a><span class="lineno"> 206</span> { <span class="comment">// this suffers form non-uniform probabilities when max is not a</span></div>
|
|
<div class="line"><a name="l00207"></a><span class="lineno"> 207</span>  <span class="comment">// power of 2, but if RAND_MAX >> max the bias is limited.</span></div>
|
|
<div class="line"><a name="l00208"></a><span class="lineno"> 208</span>  <span class="keywordflow">return</span> <a class="code" href="structfaiss_1_1RandomGenerator.html#a583f124ecacdbe037ac96e23a44dd420">rand_int</a> () % max;</div>
|
|
<div class="line"><a name="l00209"></a><span class="lineno"> 209</span> }</div>
|
|
<div class="line"><a name="l00210"></a><span class="lineno"> 210</span> </div>
|
|
<div class="line"><a name="l00211"></a><span class="lineno"><a class="line" href="structfaiss_1_1RandomGenerator.html#ac82a433d7bfa56d750907ba5cf74aed7"> 211</a></span> <span class="keywordtype">float</span> <a class="code" href="structfaiss_1_1RandomGenerator.html#ac82a433d7bfa56d750907ba5cf74aed7">RandomGenerator::rand_float</a> ()</div>
|
|
<div class="line"><a name="l00212"></a><span class="lineno"> 212</span> {</div>
|
|
<div class="line"><a name="l00213"></a><span class="lineno"> 213</span>  <span class="keywordflow">return</span> <a class="code" href="structfaiss_1_1RandomGenerator.html#a583f124ecacdbe037ac96e23a44dd420">rand_int</a>() / float(1L << 31);</div>
|
|
<div class="line"><a name="l00214"></a><span class="lineno"> 214</span> }</div>
|
|
<div class="line"><a name="l00215"></a><span class="lineno"> 215</span> </div>
|
|
<div class="line"><a name="l00216"></a><span class="lineno"> 216</span> <span class="keywordtype">double</span> RandomGenerator::rand_double ()</div>
|
|
<div class="line"><a name="l00217"></a><span class="lineno"> 217</span> {</div>
|
|
<div class="line"><a name="l00218"></a><span class="lineno"> 218</span>  <span class="keywordflow">return</span> <a class="code" href="structfaiss_1_1RandomGenerator.html#acbceaa8b017793ca4f8d90e644b0d7f4">rand_long</a>() / double(1L << 62);</div>
|
|
<div class="line"><a name="l00219"></a><span class="lineno"> 219</span> }</div>
|
|
<div class="line"><a name="l00220"></a><span class="lineno"> 220</span> </div>
|
|
<div class="line"><a name="l00221"></a><span class="lineno"> 221</span> </div>
|
|
<div class="line"><a name="l00222"></a><span class="lineno"> 222</span> <span class="comment">/***********************************************************************</span></div>
|
|
<div class="line"><a name="l00223"></a><span class="lineno"> 223</span> <span class="comment"> * Random functions in this C file only exist because Torch</span></div>
|
|
<div class="line"><a name="l00224"></a><span class="lineno"> 224</span> <span class="comment"> * counterparts are slow and not multi-threaded. Typical use is for</span></div>
|
|
<div class="line"><a name="l00225"></a><span class="lineno"> 225</span> <span class="comment"> * more than 1-100 billion values. */</span></div>
|
|
<div class="line"><a name="l00226"></a><span class="lineno"> 226</span> </div>
|
|
<div class="line"><a name="l00227"></a><span class="lineno"> 227</span> </div>
|
|
<div class="line"><a name="l00228"></a><span class="lineno"> 228</span> <span class="comment">/* Generate a set of random floating point values such that x[i] in [0,1]</span></div>
|
|
<div class="line"><a name="l00229"></a><span class="lineno"> 229</span> <span class="comment"> multi-threading. For this reason, we rely on re-entreant functions. */</span></div>
|
|
<div class="line"><a name="l00230"></a><span class="lineno"> 230</span> <span class="keywordtype">void</span> float_rand (<span class="keywordtype">float</span> * x, <span class="keywordtype">size_t</span> n, <span class="keywordtype">long</span> seed)</div>
|
|
<div class="line"><a name="l00231"></a><span class="lineno"> 231</span> {</div>
|
|
<div class="line"><a name="l00232"></a><span class="lineno"> 232</span>  <span class="comment">// only try to parallelize on large enough arrays</span></div>
|
|
<div class="line"><a name="l00233"></a><span class="lineno"> 233</span>  <span class="keyword">const</span> <span class="keywordtype">size_t</span> nblock = n < 1024 ? 1 : 1024;</div>
|
|
<div class="line"><a name="l00234"></a><span class="lineno"> 234</span> </div>
|
|
<div class="line"><a name="l00235"></a><span class="lineno"> 235</span>  RandomGenerator rng0 (seed);</div>
|
|
<div class="line"><a name="l00236"></a><span class="lineno"> 236</span>  <span class="keywordtype">int</span> a0 = rng0.rand_int (), b0 = rng0.rand_int ();</div>
|
|
<div class="line"><a name="l00237"></a><span class="lineno"> 237</span> </div>
|
|
<div class="line"><a name="l00238"></a><span class="lineno"> 238</span> <span class="preprocessor">#pragma omp parallel for</span></div>
|
|
<div class="line"><a name="l00239"></a><span class="lineno"> 239</span> <span class="preprocessor"></span> <span class="keywordflow">for</span> (<span class="keywordtype">size_t</span> j = 0; j < nblock; j++) {</div>
|
|
<div class="line"><a name="l00240"></a><span class="lineno"> 240</span> </div>
|
|
<div class="line"><a name="l00241"></a><span class="lineno"> 241</span>  RandomGenerator rng (a0 + j * b0);</div>
|
|
<div class="line"><a name="l00242"></a><span class="lineno"> 242</span> </div>
|
|
<div class="line"><a name="l00243"></a><span class="lineno"> 243</span>  <span class="keyword">const</span> <span class="keywordtype">size_t</span> istart = j * n / nblock;</div>
|
|
<div class="line"><a name="l00244"></a><span class="lineno"> 244</span>  <span class="keyword">const</span> <span class="keywordtype">size_t</span> iend = (j + 1) * n / nblock;</div>
|
|
<div class="line"><a name="l00245"></a><span class="lineno"> 245</span> </div>
|
|
<div class="line"><a name="l00246"></a><span class="lineno"> 246</span>  <span class="keywordflow">for</span> (<span class="keywordtype">size_t</span> i = istart; i < iend; i++)</div>
|
|
<div class="line"><a name="l00247"></a><span class="lineno"> 247</span>  x[i] = rng.rand_float ();</div>
|
|
<div class="line"><a name="l00248"></a><span class="lineno"> 248</span>  }</div>
|
|
<div class="line"><a name="l00249"></a><span class="lineno"> 249</span> }</div>
|
|
<div class="line"><a name="l00250"></a><span class="lineno"> 250</span> </div>
|
|
<div class="line"><a name="l00251"></a><span class="lineno"> 251</span> </div>
|
|
<div class="line"><a name="l00252"></a><span class="lineno"> 252</span> <span class="keywordtype">void</span> float_randn (<span class="keywordtype">float</span> * x, <span class="keywordtype">size_t</span> n, <span class="keywordtype">long</span> seed)</div>
|
|
<div class="line"><a name="l00253"></a><span class="lineno"> 253</span> {</div>
|
|
<div class="line"><a name="l00254"></a><span class="lineno"> 254</span>  <span class="comment">// only try to parallelize on large enough arrays</span></div>
|
|
<div class="line"><a name="l00255"></a><span class="lineno"> 255</span>  <span class="keyword">const</span> <span class="keywordtype">size_t</span> nblock = n < 1024 ? 1 : 1024;</div>
|
|
<div class="line"><a name="l00256"></a><span class="lineno"> 256</span> </div>
|
|
<div class="line"><a name="l00257"></a><span class="lineno"> 257</span>  RandomGenerator rng0 (seed);</div>
|
|
<div class="line"><a name="l00258"></a><span class="lineno"> 258</span>  <span class="keywordtype">int</span> a0 = rng0.rand_int (), b0 = rng0.rand_int ();</div>
|
|
<div class="line"><a name="l00259"></a><span class="lineno"> 259</span> </div>
|
|
<div class="line"><a name="l00260"></a><span class="lineno"> 260</span> <span class="preprocessor">#pragma omp parallel for</span></div>
|
|
<div class="line"><a name="l00261"></a><span class="lineno"> 261</span> <span class="preprocessor"></span> <span class="keywordflow">for</span> (<span class="keywordtype">size_t</span> j = 0; j < nblock; j++) {</div>
|
|
<div class="line"><a name="l00262"></a><span class="lineno"> 262</span>  RandomGenerator rng (a0 + j * b0);</div>
|
|
<div class="line"><a name="l00263"></a><span class="lineno"> 263</span> </div>
|
|
<div class="line"><a name="l00264"></a><span class="lineno"> 264</span>  <span class="keywordtype">double</span> a = 0, b = 0, s = 0;</div>
|
|
<div class="line"><a name="l00265"></a><span class="lineno"> 265</span>  <span class="keywordtype">int</span> state = 0; <span class="comment">/* generate two number per "do-while" loop */</span></div>
|
|
<div class="line"><a name="l00266"></a><span class="lineno"> 266</span> </div>
|
|
<div class="line"><a name="l00267"></a><span class="lineno"> 267</span>  <span class="keyword">const</span> <span class="keywordtype">size_t</span> istart = j * n / nblock;</div>
|
|
<div class="line"><a name="l00268"></a><span class="lineno"> 268</span>  <span class="keyword">const</span> <span class="keywordtype">size_t</span> iend = (j + 1) * n / nblock;</div>
|
|
<div class="line"><a name="l00269"></a><span class="lineno"> 269</span> </div>
|
|
<div class="line"><a name="l00270"></a><span class="lineno"> 270</span>  <span class="keywordflow">for</span> (<span class="keywordtype">size_t</span> i = istart; i < iend; i++) {</div>
|
|
<div class="line"><a name="l00271"></a><span class="lineno"> 271</span>  <span class="comment">/* Marsaglia's method (see Knuth) */</span></div>
|
|
<div class="line"><a name="l00272"></a><span class="lineno"> 272</span>  <span class="keywordflow">if</span> (state == 0) {</div>
|
|
<div class="line"><a name="l00273"></a><span class="lineno"> 273</span>  <span class="keywordflow">do</span> {</div>
|
|
<div class="line"><a name="l00274"></a><span class="lineno"> 274</span>  a = 2.0 * rng.rand_double () - 1;</div>
|
|
<div class="line"><a name="l00275"></a><span class="lineno"> 275</span>  b = 2.0 * rng.rand_double () - 1;</div>
|
|
<div class="line"><a name="l00276"></a><span class="lineno"> 276</span>  s = a * a + b * b;</div>
|
|
<div class="line"><a name="l00277"></a><span class="lineno"> 277</span>  } <span class="keywordflow">while</span> (s >= 1.0);</div>
|
|
<div class="line"><a name="l00278"></a><span class="lineno"> 278</span>  x[i] = a * sqrt(-2.0 * log(s) / s);</div>
|
|
<div class="line"><a name="l00279"></a><span class="lineno"> 279</span>  }</div>
|
|
<div class="line"><a name="l00280"></a><span class="lineno"> 280</span>  <span class="keywordflow">else</span></div>
|
|
<div class="line"><a name="l00281"></a><span class="lineno"> 281</span>  x[i] = b * sqrt(-2.0 * log(s) / s);</div>
|
|
<div class="line"><a name="l00282"></a><span class="lineno"> 282</span>  state = 1 - state;</div>
|
|
<div class="line"><a name="l00283"></a><span class="lineno"> 283</span>  }</div>
|
|
<div class="line"><a name="l00284"></a><span class="lineno"> 284</span>  }</div>
|
|
<div class="line"><a name="l00285"></a><span class="lineno"> 285</span> }</div>
|
|
<div class="line"><a name="l00286"></a><span class="lineno"> 286</span> </div>
|
|
<div class="line"><a name="l00287"></a><span class="lineno"> 287</span> </div>
|
|
<div class="line"><a name="l00288"></a><span class="lineno"> 288</span> <span class="comment">/* Integer versions */</span></div>
|
|
<div class="line"><a name="l00289"></a><span class="lineno"> 289</span> <span class="keywordtype">void</span> long_rand (<span class="keywordtype">long</span> * x, <span class="keywordtype">size_t</span> n, <span class="keywordtype">long</span> seed)</div>
|
|
<div class="line"><a name="l00290"></a><span class="lineno"> 290</span> {</div>
|
|
<div class="line"><a name="l00291"></a><span class="lineno"> 291</span>  <span class="comment">// only try to parallelize on large enough arrays</span></div>
|
|
<div class="line"><a name="l00292"></a><span class="lineno"> 292</span>  <span class="keyword">const</span> <span class="keywordtype">size_t</span> nblock = n < 1024 ? 1 : 1024;</div>
|
|
<div class="line"><a name="l00293"></a><span class="lineno"> 293</span> </div>
|
|
<div class="line"><a name="l00294"></a><span class="lineno"> 294</span>  RandomGenerator rng0 (seed);</div>
|
|
<div class="line"><a name="l00295"></a><span class="lineno"> 295</span>  <span class="keywordtype">int</span> a0 = rng0.rand_int (), b0 = rng0.rand_int ();</div>
|
|
<div class="line"><a name="l00296"></a><span class="lineno"> 296</span> </div>
|
|
<div class="line"><a name="l00297"></a><span class="lineno"> 297</span> <span class="preprocessor">#pragma omp parallel for</span></div>
|
|
<div class="line"><a name="l00298"></a><span class="lineno"> 298</span> <span class="preprocessor"></span> <span class="keywordflow">for</span> (<span class="keywordtype">size_t</span> j = 0; j < nblock; j++) {</div>
|
|
<div class="line"><a name="l00299"></a><span class="lineno"> 299</span> </div>
|
|
<div class="line"><a name="l00300"></a><span class="lineno"> 300</span>  RandomGenerator rng (a0 + j * b0);</div>
|
|
<div class="line"><a name="l00301"></a><span class="lineno"> 301</span> </div>
|
|
<div class="line"><a name="l00302"></a><span class="lineno"> 302</span>  <span class="keyword">const</span> <span class="keywordtype">size_t</span> istart = j * n / nblock;</div>
|
|
<div class="line"><a name="l00303"></a><span class="lineno"> 303</span>  <span class="keyword">const</span> <span class="keywordtype">size_t</span> iend = (j + 1) * n / nblock;</div>
|
|
<div class="line"><a name="l00304"></a><span class="lineno"> 304</span>  <span class="keywordflow">for</span> (<span class="keywordtype">size_t</span> i = istart; i < iend; i++)</div>
|
|
<div class="line"><a name="l00305"></a><span class="lineno"> 305</span>  x[i] = rng.rand_long ();</div>
|
|
<div class="line"><a name="l00306"></a><span class="lineno"> 306</span>  }</div>
|
|
<div class="line"><a name="l00307"></a><span class="lineno"> 307</span> }</div>
|
|
<div class="line"><a name="l00308"></a><span class="lineno"> 308</span> </div>
|
|
<div class="line"><a name="l00309"></a><span class="lineno"> 309</span> </div>
|
|
<div class="line"><a name="l00310"></a><span class="lineno"> 310</span> </div>
|
|
<div class="line"><a name="l00311"></a><span class="lineno"> 311</span> <span class="keywordtype">void</span> rand_perm (<span class="keywordtype">int</span> *perm, <span class="keywordtype">size_t</span> n, <span class="keywordtype">long</span> seed)</div>
|
|
<div class="line"><a name="l00312"></a><span class="lineno"> 312</span> {</div>
|
|
<div class="line"><a name="l00313"></a><span class="lineno"> 313</span>  <span class="keywordflow">for</span> (<span class="keywordtype">size_t</span> i = 0; i < n; i++) perm[i] = i;</div>
|
|
<div class="line"><a name="l00314"></a><span class="lineno"> 314</span> </div>
|
|
<div class="line"><a name="l00315"></a><span class="lineno"> 315</span>  RandomGenerator rng (seed);</div>
|
|
<div class="line"><a name="l00316"></a><span class="lineno"> 316</span> </div>
|
|
<div class="line"><a name="l00317"></a><span class="lineno"> 317</span>  <span class="keywordflow">for</span> (<span class="keywordtype">size_t</span> i = 0; i + 1 < n; i++) {</div>
|
|
<div class="line"><a name="l00318"></a><span class="lineno"> 318</span>  <span class="keywordtype">int</span> i2 = i + rng.rand_int (n - i);</div>
|
|
<div class="line"><a name="l00319"></a><span class="lineno"> 319</span>  std::swap(perm[i], perm[i2]);</div>
|
|
<div class="line"><a name="l00320"></a><span class="lineno"> 320</span>  }</div>
|
|
<div class="line"><a name="l00321"></a><span class="lineno"> 321</span> }</div>
|
|
<div class="line"><a name="l00322"></a><span class="lineno"> 322</span> </div>
|
|
<div class="line"><a name="l00323"></a><span class="lineno"> 323</span> </div>
|
|
<div class="line"><a name="l00324"></a><span class="lineno"> 324</span> </div>
|
|
<div class="line"><a name="l00325"></a><span class="lineno"> 325</span> </div>
|
|
<div class="line"><a name="l00326"></a><span class="lineno"> 326</span> <span class="keywordtype">void</span> byte_rand (uint8_t * x, <span class="keywordtype">size_t</span> n, <span class="keywordtype">long</span> seed)</div>
|
|
<div class="line"><a name="l00327"></a><span class="lineno"> 327</span> {</div>
|
|
<div class="line"><a name="l00328"></a><span class="lineno"> 328</span>  <span class="comment">// only try to parallelize on large enough arrays</span></div>
|
|
<div class="line"><a name="l00329"></a><span class="lineno"> 329</span>  <span class="keyword">const</span> <span class="keywordtype">size_t</span> nblock = n < 1024 ? 1 : 1024;</div>
|
|
<div class="line"><a name="l00330"></a><span class="lineno"> 330</span> </div>
|
|
<div class="line"><a name="l00331"></a><span class="lineno"> 331</span>  RandomGenerator rng0 (seed);</div>
|
|
<div class="line"><a name="l00332"></a><span class="lineno"> 332</span>  <span class="keywordtype">int</span> a0 = rng0.rand_int (), b0 = rng0.rand_int ();</div>
|
|
<div class="line"><a name="l00333"></a><span class="lineno"> 333</span> </div>
|
|
<div class="line"><a name="l00334"></a><span class="lineno"> 334</span> <span class="preprocessor">#pragma omp parallel for</span></div>
|
|
<div class="line"><a name="l00335"></a><span class="lineno"> 335</span> <span class="preprocessor"></span> <span class="keywordflow">for</span> (<span class="keywordtype">size_t</span> j = 0; j < nblock; j++) {</div>
|
|
<div class="line"><a name="l00336"></a><span class="lineno"> 336</span> </div>
|
|
<div class="line"><a name="l00337"></a><span class="lineno"> 337</span>  RandomGenerator rng (a0 + j * b0);</div>
|
|
<div class="line"><a name="l00338"></a><span class="lineno"> 338</span> </div>
|
|
<div class="line"><a name="l00339"></a><span class="lineno"> 339</span>  <span class="keyword">const</span> <span class="keywordtype">size_t</span> istart = j * n / nblock;</div>
|
|
<div class="line"><a name="l00340"></a><span class="lineno"> 340</span>  <span class="keyword">const</span> <span class="keywordtype">size_t</span> iend = (j + 1) * n / nblock;</div>
|
|
<div class="line"><a name="l00341"></a><span class="lineno"> 341</span> </div>
|
|
<div class="line"><a name="l00342"></a><span class="lineno"> 342</span>  <span class="keywordtype">size_t</span> i;</div>
|
|
<div class="line"><a name="l00343"></a><span class="lineno"> 343</span>  <span class="keywordflow">for</span> (i = istart; i < iend; i++)</div>
|
|
<div class="line"><a name="l00344"></a><span class="lineno"> 344</span>  x[i] = rng.rand_long ();</div>
|
|
<div class="line"><a name="l00345"></a><span class="lineno"> 345</span>  }</div>
|
|
<div class="line"><a name="l00346"></a><span class="lineno"> 346</span> }</div>
|
|
<div class="line"><a name="l00347"></a><span class="lineno"> 347</span> </div>
|
|
<div class="line"><a name="l00348"></a><span class="lineno"> 348</span> </div>
|
|
<div class="line"><a name="l00349"></a><span class="lineno"> 349</span> </div>
|
|
<div class="line"><a name="l00350"></a><span class="lineno"> 350</span> <span class="keywordtype">void</span> reflection (<span class="keyword">const</span> <span class="keywordtype">float</span> * __restrict u,</div>
|
|
<div class="line"><a name="l00351"></a><span class="lineno"> 351</span>  <span class="keywordtype">float</span> * __restrict x,</div>
|
|
<div class="line"><a name="l00352"></a><span class="lineno"> 352</span>  <span class="keywordtype">size_t</span> n, <span class="keywordtype">size_t</span> d, <span class="keywordtype">size_t</span> nu)</div>
|
|
<div class="line"><a name="l00353"></a><span class="lineno"> 353</span> {</div>
|
|
<div class="line"><a name="l00354"></a><span class="lineno"> 354</span>  <span class="keywordtype">size_t</span> i, j, l;</div>
|
|
<div class="line"><a name="l00355"></a><span class="lineno"> 355</span>  <span class="keywordflow">for</span> (i = 0; i < n; i++) {</div>
|
|
<div class="line"><a name="l00356"></a><span class="lineno"> 356</span>  <span class="keyword">const</span> <span class="keywordtype">float</span> * up = u;</div>
|
|
<div class="line"><a name="l00357"></a><span class="lineno"> 357</span>  <span class="keywordflow">for</span> (l = 0; l < nu; l++) {</div>
|
|
<div class="line"><a name="l00358"></a><span class="lineno"> 358</span>  <span class="keywordtype">float</span> ip1 = 0, ip2 = 0;</div>
|
|
<div class="line"><a name="l00359"></a><span class="lineno"> 359</span> </div>
|
|
<div class="line"><a name="l00360"></a><span class="lineno"> 360</span>  <span class="keywordflow">for</span> (j = 0; j < d; j+=2) {</div>
|
|
<div class="line"><a name="l00361"></a><span class="lineno"> 361</span>  ip1 += up[j] * x[j];</div>
|
|
<div class="line"><a name="l00362"></a><span class="lineno"> 362</span>  ip2 += up[j+1] * x[j+1];</div>
|
|
<div class="line"><a name="l00363"></a><span class="lineno"> 363</span>  }</div>
|
|
<div class="line"><a name="l00364"></a><span class="lineno"> 364</span>  <span class="keywordtype">float</span> ip = 2 * (ip1 + ip2);</div>
|
|
<div class="line"><a name="l00365"></a><span class="lineno"> 365</span> </div>
|
|
<div class="line"><a name="l00366"></a><span class="lineno"> 366</span>  <span class="keywordflow">for</span> (j = 0; j < d; j++)</div>
|
|
<div class="line"><a name="l00367"></a><span class="lineno"> 367</span>  x[j] -= ip * up[j];</div>
|
|
<div class="line"><a name="l00368"></a><span class="lineno"> 368</span>  up += d;</div>
|
|
<div class="line"><a name="l00369"></a><span class="lineno"> 369</span>  }</div>
|
|
<div class="line"><a name="l00370"></a><span class="lineno"> 370</span>  x += d;</div>
|
|
<div class="line"><a name="l00371"></a><span class="lineno"> 371</span>  }</div>
|
|
<div class="line"><a name="l00372"></a><span class="lineno"> 372</span> }</div>
|
|
<div class="line"><a name="l00373"></a><span class="lineno"> 373</span> </div>
|
|
<div class="line"><a name="l00374"></a><span class="lineno"> 374</span> </div>
|
|
<div class="line"><a name="l00375"></a><span class="lineno"> 375</span> <span class="comment">/* Reference implementation (slower) */</span></div>
|
|
<div class="line"><a name="l00376"></a><span class="lineno"> 376</span> <span class="keywordtype">void</span> reflection_ref (<span class="keyword">const</span> <span class="keywordtype">float</span> * u, <span class="keywordtype">float</span> * x, <span class="keywordtype">size_t</span> n, <span class="keywordtype">size_t</span> d, <span class="keywordtype">size_t</span> nu)</div>
|
|
<div class="line"><a name="l00377"></a><span class="lineno"> 377</span> {</div>
|
|
<div class="line"><a name="l00378"></a><span class="lineno"> 378</span>  <span class="keywordtype">size_t</span> i, j, l;</div>
|
|
<div class="line"><a name="l00379"></a><span class="lineno"> 379</span>  <span class="keywordflow">for</span> (i = 0; i < n; i++) {</div>
|
|
<div class="line"><a name="l00380"></a><span class="lineno"> 380</span>  <span class="keyword">const</span> <span class="keywordtype">float</span> * up = u;</div>
|
|
<div class="line"><a name="l00381"></a><span class="lineno"> 381</span>  <span class="keywordflow">for</span> (l = 0; l < nu; l++) {</div>
|
|
<div class="line"><a name="l00382"></a><span class="lineno"> 382</span>  <span class="keywordtype">double</span> ip = 0;</div>
|
|
<div class="line"><a name="l00383"></a><span class="lineno"> 383</span> </div>
|
|
<div class="line"><a name="l00384"></a><span class="lineno"> 384</span>  <span class="keywordflow">for</span> (j = 0; j < d; j++)</div>
|
|
<div class="line"><a name="l00385"></a><span class="lineno"> 385</span>  ip += up[j] * x[j];</div>
|
|
<div class="line"><a name="l00386"></a><span class="lineno"> 386</span>  ip *= 2;</div>
|
|
<div class="line"><a name="l00387"></a><span class="lineno"> 387</span> </div>
|
|
<div class="line"><a name="l00388"></a><span class="lineno"> 388</span>  <span class="keywordflow">for</span> (j = 0; j < d; j++)</div>
|
|
<div class="line"><a name="l00389"></a><span class="lineno"> 389</span>  x[j] -= ip * up[j];</div>
|
|
<div class="line"><a name="l00390"></a><span class="lineno"> 390</span> </div>
|
|
<div class="line"><a name="l00391"></a><span class="lineno"> 391</span>  up += d;</div>
|
|
<div class="line"><a name="l00392"></a><span class="lineno"> 392</span>  }</div>
|
|
<div class="line"><a name="l00393"></a><span class="lineno"> 393</span>  x += d;</div>
|
|
<div class="line"><a name="l00394"></a><span class="lineno"> 394</span>  }</div>
|
|
<div class="line"><a name="l00395"></a><span class="lineno"> 395</span> }</div>
|
|
<div class="line"><a name="l00396"></a><span class="lineno"> 396</span> </div>
|
|
<div class="line"><a name="l00397"></a><span class="lineno"> 397</span> <span class="comment">/*********************************************************</span></div>
|
|
<div class="line"><a name="l00398"></a><span class="lineno"> 398</span> <span class="comment"> * Optimized distance computations</span></div>
|
|
<div class="line"><a name="l00399"></a><span class="lineno"> 399</span> <span class="comment"> *********************************************************/</span></div>
|
|
<div class="line"><a name="l00400"></a><span class="lineno"> 400</span> </div>
|
|
<div class="line"><a name="l00401"></a><span class="lineno"> 401</span> </div>
|
|
<div class="line"><a name="l00402"></a><span class="lineno"> 402</span> </div>
|
|
<div class="line"><a name="l00403"></a><span class="lineno"> 403</span> <span class="comment">/* Functions to compute:</span></div>
|
|
<div class="line"><a name="l00404"></a><span class="lineno"> 404</span> <span class="comment"> - L2 distance between 2 vectors</span></div>
|
|
<div class="line"><a name="l00405"></a><span class="lineno"> 405</span> <span class="comment"> - inner product between 2 vectors</span></div>
|
|
<div class="line"><a name="l00406"></a><span class="lineno"> 406</span> <span class="comment"> - L2 norm of a vector</span></div>
|
|
<div class="line"><a name="l00407"></a><span class="lineno"> 407</span> <span class="comment"></span></div>
|
|
<div class="line"><a name="l00408"></a><span class="lineno"> 408</span> <span class="comment"> The functions should probably not be invoked when a large number of</span></div>
|
|
<div class="line"><a name="l00409"></a><span class="lineno"> 409</span> <span class="comment"> vectors are be processed in batch (in which case Matrix multiply</span></div>
|
|
<div class="line"><a name="l00410"></a><span class="lineno"> 410</span> <span class="comment"> is faster), but may be useful for comparing vectors isolated in</span></div>
|
|
<div class="line"><a name="l00411"></a><span class="lineno"> 411</span> <span class="comment"> memory.</span></div>
|
|
<div class="line"><a name="l00412"></a><span class="lineno"> 412</span> <span class="comment"></span></div>
|
|
<div class="line"><a name="l00413"></a><span class="lineno"> 413</span> <span class="comment"> Works with any vectors of any dimension, even unaligned (in which</span></div>
|
|
<div class="line"><a name="l00414"></a><span class="lineno"> 414</span> <span class="comment"> case they are slower).</span></div>
|
|
<div class="line"><a name="l00415"></a><span class="lineno"> 415</span> <span class="comment"></span></div>
|
|
<div class="line"><a name="l00416"></a><span class="lineno"> 416</span> <span class="comment">*/</span></div>
|
|
<div class="line"><a name="l00417"></a><span class="lineno"> 417</span> </div>
|
|
<div class="line"><a name="l00418"></a><span class="lineno"> 418</span> </div>
|
|
<div class="line"><a name="l00419"></a><span class="lineno"> 419</span> <span class="comment">/*********************************************************</span></div>
|
|
<div class="line"><a name="l00420"></a><span class="lineno"> 420</span> <span class="comment"> * Reference implementations</span></div>
|
|
<div class="line"><a name="l00421"></a><span class="lineno"> 421</span> <span class="comment"> */</span></div>
|
|
<div class="line"><a name="l00422"></a><span class="lineno"> 422</span> </div>
|
|
<div class="line"><a name="l00423"></a><span class="lineno"> 423</span> </div>
|
|
<div class="line"><a name="l00424"></a><span class="lineno"> 424</span> </div>
|
|
<div class="line"><a name="l00425"></a><span class="lineno"> 425</span> <span class="comment">/* same without SSE */</span></div>
|
|
<div class="line"><a name="l00426"></a><span class="lineno"> 426</span> <span class="keywordtype">float</span> fvec_L2sqr_ref (<span class="keyword">const</span> <span class="keywordtype">float</span> * x,</div>
|
|
<div class="line"><a name="l00427"></a><span class="lineno"> 427</span>  <span class="keyword">const</span> <span class="keywordtype">float</span> * y,</div>
|
|
<div class="line"><a name="l00428"></a><span class="lineno"> 428</span>  <span class="keywordtype">size_t</span> d)</div>
|
|
<div class="line"><a name="l00429"></a><span class="lineno"> 429</span> {</div>
|
|
<div class="line"><a name="l00430"></a><span class="lineno"> 430</span>  <span class="keywordtype">size_t</span> i;</div>
|
|
<div class="line"><a name="l00431"></a><span class="lineno"> 431</span>  <span class="keywordtype">float</span> res_ = 0;</div>
|
|
<div class="line"><a name="l00432"></a><span class="lineno"> 432</span>  <span class="keywordflow">for</span> (i = 0; i < d; i++) {</div>
|
|
<div class="line"><a name="l00433"></a><span class="lineno"> 433</span>  <span class="keyword">const</span> <span class="keywordtype">float</span> tmp = x[i] - y[i];</div>
|
|
<div class="line"><a name="l00434"></a><span class="lineno"> 434</span>  res_ += tmp * tmp;</div>
|
|
<div class="line"><a name="l00435"></a><span class="lineno"> 435</span>  }</div>
|
|
<div class="line"><a name="l00436"></a><span class="lineno"> 436</span>  <span class="keywordflow">return</span> res_;</div>
|
|
<div class="line"><a name="l00437"></a><span class="lineno"> 437</span> }</div>
|
|
<div class="line"><a name="l00438"></a><span class="lineno"> 438</span> </div>
|
|
<div class="line"><a name="l00439"></a><span class="lineno"> 439</span> <span class="keywordtype">float</span> fvec_inner_product_ref (<span class="keyword">const</span> <span class="keywordtype">float</span> * x,</div>
|
|
<div class="line"><a name="l00440"></a><span class="lineno"> 440</span>  <span class="keyword">const</span> <span class="keywordtype">float</span> * y,</div>
|
|
<div class="line"><a name="l00441"></a><span class="lineno"> 441</span>  <span class="keywordtype">size_t</span> d)</div>
|
|
<div class="line"><a name="l00442"></a><span class="lineno"> 442</span> {</div>
|
|
<div class="line"><a name="l00443"></a><span class="lineno"> 443</span>  <span class="keywordtype">size_t</span> i;</div>
|
|
<div class="line"><a name="l00444"></a><span class="lineno"> 444</span>  <span class="keywordtype">float</span> res_ = 0;</div>
|
|
<div class="line"><a name="l00445"></a><span class="lineno"> 445</span>  <span class="keywordflow">for</span> (i = 0; i < d; i++)</div>
|
|
<div class="line"><a name="l00446"></a><span class="lineno"> 446</span>  res_ += x[i] * y[i];</div>
|
|
<div class="line"><a name="l00447"></a><span class="lineno"> 447</span>  <span class="keywordflow">return</span> res_;</div>
|
|
<div class="line"><a name="l00448"></a><span class="lineno"> 448</span> }</div>
|
|
<div class="line"><a name="l00449"></a><span class="lineno"> 449</span> </div>
|
|
<div class="line"><a name="l00450"></a><span class="lineno"> 450</span> <span class="keywordtype">float</span> fvec_norm_L2sqr_ref (<span class="keyword">const</span> <span class="keywordtype">float</span> * __restrict x,</div>
|
|
<div class="line"><a name="l00451"></a><span class="lineno"> 451</span>  <span class="keywordtype">size_t</span> d)</div>
|
|
<div class="line"><a name="l00452"></a><span class="lineno"> 452</span> {</div>
|
|
<div class="line"><a name="l00453"></a><span class="lineno"> 453</span>  <span class="keywordtype">size_t</span> i;</div>
|
|
<div class="line"><a name="l00454"></a><span class="lineno"> 454</span>  <span class="keywordtype">double</span> res_ = 0;</div>
|
|
<div class="line"><a name="l00455"></a><span class="lineno"> 455</span>  <span class="keywordflow">for</span> (i = 0; i < d; i++)</div>
|
|
<div class="line"><a name="l00456"></a><span class="lineno"> 456</span>  res_ += x[i] * x[i];</div>
|
|
<div class="line"><a name="l00457"></a><span class="lineno"> 457</span>  <span class="keywordflow">return</span> res_;</div>
|
|
<div class="line"><a name="l00458"></a><span class="lineno"> 458</span> }</div>
|
|
<div class="line"><a name="l00459"></a><span class="lineno"> 459</span> </div>
|
|
<div class="line"><a name="l00460"></a><span class="lineno"> 460</span> </div>
|
|
<div class="line"><a name="l00461"></a><span class="lineno"> 461</span> <span class="comment">/*********************************************************</span></div>
|
|
<div class="line"><a name="l00462"></a><span class="lineno"> 462</span> <span class="comment"> * SSE and AVX implementations</span></div>
|
|
<div class="line"><a name="l00463"></a><span class="lineno"> 463</span> <span class="comment"> */</span></div>
|
|
<div class="line"><a name="l00464"></a><span class="lineno"> 464</span> </div>
|
|
<div class="line"><a name="l00465"></a><span class="lineno"> 465</span> <span class="comment">// reads 0 <= d < 4 floats as __m128</span></div>
|
|
<div class="line"><a name="l00466"></a><span class="lineno"> 466</span> <span class="keyword">static</span> <span class="keyword">inline</span> __m128 masked_read (<span class="keywordtype">int</span> d, <span class="keyword">const</span> <span class="keywordtype">float</span> *x)</div>
|
|
<div class="line"><a name="l00467"></a><span class="lineno"> 467</span> {</div>
|
|
<div class="line"><a name="l00468"></a><span class="lineno"> 468</span>  assert (0 <= d && d < 4);</div>
|
|
<div class="line"><a name="l00469"></a><span class="lineno"> 469</span>  __attribute__((__aligned__(16))) float buf[4] = {0, 0, 0, 0};</div>
|
|
<div class="line"><a name="l00470"></a><span class="lineno"> 470</span>  <span class="keywordflow">switch</span> (d) {</div>
|
|
<div class="line"><a name="l00471"></a><span class="lineno"> 471</span>  <span class="keywordflow">case</span> 3:</div>
|
|
<div class="line"><a name="l00472"></a><span class="lineno"> 472</span>  buf[2] = x[2];</div>
|
|
<div class="line"><a name="l00473"></a><span class="lineno"> 473</span>  <span class="keywordflow">case</span> 2:</div>
|
|
<div class="line"><a name="l00474"></a><span class="lineno"> 474</span>  buf[1] = x[1];</div>
|
|
<div class="line"><a name="l00475"></a><span class="lineno"> 475</span>  <span class="keywordflow">case</span> 1:</div>
|
|
<div class="line"><a name="l00476"></a><span class="lineno"> 476</span>  buf[0] = x[0];</div>
|
|
<div class="line"><a name="l00477"></a><span class="lineno"> 477</span>  }</div>
|
|
<div class="line"><a name="l00478"></a><span class="lineno"> 478</span>  <span class="keywordflow">return</span> _mm_load_ps (buf);</div>
|
|
<div class="line"><a name="l00479"></a><span class="lineno"> 479</span>  <span class="comment">// cannot use AVX2 _mm_mask_set1_epi32</span></div>
|
|
<div class="line"><a name="l00480"></a><span class="lineno"> 480</span> }</div>
|
|
<div class="line"><a name="l00481"></a><span class="lineno"> 481</span> </div>
|
|
<div class="line"><a name="l00482"></a><span class="lineno"> 482</span> <span class="preprocessor">#ifdef USE_AVX</span></div>
|
|
<div class="line"><a name="l00483"></a><span class="lineno"> 483</span> <span class="preprocessor"></span></div>
|
|
<div class="line"><a name="l00484"></a><span class="lineno"> 484</span> <span class="comment">// reads 0 <= d < 8 floats as __m256</span></div>
|
|
<div class="line"><a name="l00485"></a><span class="lineno"> 485</span> <span class="keyword">static</span> <span class="keyword">inline</span> __m256 masked_read_8 (<span class="keywordtype">int</span> d, <span class="keyword">const</span> <span class="keywordtype">float</span> *x)</div>
|
|
<div class="line"><a name="l00486"></a><span class="lineno"> 486</span> {</div>
|
|
<div class="line"><a name="l00487"></a><span class="lineno"> 487</span>  assert (0 <= d && d < 8);</div>
|
|
<div class="line"><a name="l00488"></a><span class="lineno"> 488</span>  <span class="keywordflow">if</span> (d < 4) {</div>
|
|
<div class="line"><a name="l00489"></a><span class="lineno"> 489</span>  __m256 res = _mm256_setzero_ps ();</div>
|
|
<div class="line"><a name="l00490"></a><span class="lineno"> 490</span>  res = _mm256_insertf128_ps (res, masked_read (d, x), 0);</div>
|
|
<div class="line"><a name="l00491"></a><span class="lineno"> 491</span>  <span class="keywordflow">return</span> res;</div>
|
|
<div class="line"><a name="l00492"></a><span class="lineno"> 492</span>  } <span class="keywordflow">else</span> {</div>
|
|
<div class="line"><a name="l00493"></a><span class="lineno"> 493</span>  __m256 res = _mm256_setzero_ps ();</div>
|
|
<div class="line"><a name="l00494"></a><span class="lineno"> 494</span>  res = _mm256_insertf128_ps (res, _mm_loadu_ps (x), 0);</div>
|
|
<div class="line"><a name="l00495"></a><span class="lineno"> 495</span>  res = _mm256_insertf128_ps (res, masked_read (d - 4, x + 4), 1);</div>
|
|
<div class="line"><a name="l00496"></a><span class="lineno"> 496</span>  <span class="keywordflow">return</span> res;</div>
|
|
<div class="line"><a name="l00497"></a><span class="lineno"> 497</span>  }</div>
|
|
<div class="line"><a name="l00498"></a><span class="lineno"> 498</span> }</div>
|
|
<div class="line"><a name="l00499"></a><span class="lineno"> 499</span> </div>
|
|
<div class="line"><a name="l00500"></a><span class="lineno"> 500</span> <span class="keywordtype">float</span> fvec_inner_product (<span class="keyword">const</span> <span class="keywordtype">float</span> * x,</div>
|
|
<div class="line"><a name="l00501"></a><span class="lineno"> 501</span>  <span class="keyword">const</span> <span class="keywordtype">float</span> * y,</div>
|
|
<div class="line"><a name="l00502"></a><span class="lineno"> 502</span>  <span class="keywordtype">size_t</span> d)</div>
|
|
<div class="line"><a name="l00503"></a><span class="lineno"> 503</span> {</div>
|
|
<div class="line"><a name="l00504"></a><span class="lineno"> 504</span>  __m256 msum1 = _mm256_setzero_ps();</div>
|
|
<div class="line"><a name="l00505"></a><span class="lineno"> 505</span> </div>
|
|
<div class="line"><a name="l00506"></a><span class="lineno"> 506</span>  <span class="keywordflow">while</span> (d >= 8) {</div>
|
|
<div class="line"><a name="l00507"></a><span class="lineno"> 507</span>  __m256 mx = _mm256_loadu_ps (x); x += 8;</div>
|
|
<div class="line"><a name="l00508"></a><span class="lineno"> 508</span>  __m256 my = _mm256_loadu_ps (y); y += 8;</div>
|
|
<div class="line"><a name="l00509"></a><span class="lineno"> 509</span>  msum1 = _mm256_add_ps (msum1, _mm256_mul_ps (mx, my));</div>
|
|
<div class="line"><a name="l00510"></a><span class="lineno"> 510</span>  d -= 8;</div>
|
|
<div class="line"><a name="l00511"></a><span class="lineno"> 511</span>  }</div>
|
|
<div class="line"><a name="l00512"></a><span class="lineno"> 512</span> </div>
|
|
<div class="line"><a name="l00513"></a><span class="lineno"> 513</span>  __m128 msum2 = _mm256_extractf128_ps(msum1, 1);</div>
|
|
<div class="line"><a name="l00514"></a><span class="lineno"> 514</span>  msum2 += _mm256_extractf128_ps(msum1, 0);</div>
|
|
<div class="line"><a name="l00515"></a><span class="lineno"> 515</span> </div>
|
|
<div class="line"><a name="l00516"></a><span class="lineno"> 516</span>  <span class="keywordflow">if</span> (d >= 4) {</div>
|
|
<div class="line"><a name="l00517"></a><span class="lineno"> 517</span>  __m128 mx = _mm_loadu_ps (x); x += 4;</div>
|
|
<div class="line"><a name="l00518"></a><span class="lineno"> 518</span>  __m128 my = _mm_loadu_ps (y); y += 4;</div>
|
|
<div class="line"><a name="l00519"></a><span class="lineno"> 519</span>  msum2 = _mm_add_ps (msum2, _mm_mul_ps (mx, my));</div>
|
|
<div class="line"><a name="l00520"></a><span class="lineno"> 520</span>  d -= 4;</div>
|
|
<div class="line"><a name="l00521"></a><span class="lineno"> 521</span>  }</div>
|
|
<div class="line"><a name="l00522"></a><span class="lineno"> 522</span> </div>
|
|
<div class="line"><a name="l00523"></a><span class="lineno"> 523</span>  <span class="keywordflow">if</span> (d > 0) {</div>
|
|
<div class="line"><a name="l00524"></a><span class="lineno"> 524</span>  __m128 mx = masked_read (d, x);</div>
|
|
<div class="line"><a name="l00525"></a><span class="lineno"> 525</span>  __m128 my = masked_read (d, y);</div>
|
|
<div class="line"><a name="l00526"></a><span class="lineno"> 526</span>  msum2 = _mm_add_ps (msum2, _mm_mul_ps (mx, my));</div>
|
|
<div class="line"><a name="l00527"></a><span class="lineno"> 527</span>  }</div>
|
|
<div class="line"><a name="l00528"></a><span class="lineno"> 528</span> </div>
|
|
<div class="line"><a name="l00529"></a><span class="lineno"> 529</span>  msum2 = _mm_hadd_ps (msum2, msum2);</div>
|
|
<div class="line"><a name="l00530"></a><span class="lineno"> 530</span>  msum2 = _mm_hadd_ps (msum2, msum2);</div>
|
|
<div class="line"><a name="l00531"></a><span class="lineno"> 531</span>  <span class="keywordflow">return</span> _mm_cvtss_f32 (msum2);</div>
|
|
<div class="line"><a name="l00532"></a><span class="lineno"> 532</span> }</div>
|
|
<div class="line"><a name="l00533"></a><span class="lineno"> 533</span> </div>
|
|
<div class="line"><a name="l00534"></a><span class="lineno"> 534</span> <span class="keywordtype">float</span> <a class="code" href="namespacefaiss.html#a7466bd32de31640860393a701eaac5ad">fvec_L2sqr</a> (<span class="keyword">const</span> <span class="keywordtype">float</span> * x,</div>
|
|
<div class="line"><a name="l00535"></a><span class="lineno"> 535</span>  <span class="keyword">const</span> <span class="keywordtype">float</span> * y,</div>
|
|
<div class="line"><a name="l00536"></a><span class="lineno"> 536</span>  <span class="keywordtype">size_t</span> d)</div>
|
|
<div class="line"><a name="l00537"></a><span class="lineno"> 537</span> {</div>
|
|
<div class="line"><a name="l00538"></a><span class="lineno"> 538</span>  __m256 msum1 = _mm256_setzero_ps();</div>
|
|
<div class="line"><a name="l00539"></a><span class="lineno"> 539</span> </div>
|
|
<div class="line"><a name="l00540"></a><span class="lineno"> 540</span>  <span class="keywordflow">while</span> (d >= 8) {</div>
|
|
<div class="line"><a name="l00541"></a><span class="lineno"> 541</span>  __m256 mx = _mm256_loadu_ps (x); x += 8;</div>
|
|
<div class="line"><a name="l00542"></a><span class="lineno"> 542</span>  __m256 my = _mm256_loadu_ps (y); y += 8;</div>
|
|
<div class="line"><a name="l00543"></a><span class="lineno"> 543</span>  <span class="keyword">const</span> __m256 a_m_b1 = mx - my;</div>
|
|
<div class="line"><a name="l00544"></a><span class="lineno"> 544</span>  msum1 += a_m_b1 * a_m_b1;</div>
|
|
<div class="line"><a name="l00545"></a><span class="lineno"> 545</span>  d -= 8;</div>
|
|
<div class="line"><a name="l00546"></a><span class="lineno"> 546</span>  }</div>
|
|
<div class="line"><a name="l00547"></a><span class="lineno"> 547</span> </div>
|
|
<div class="line"><a name="l00548"></a><span class="lineno"> 548</span>  __m128 msum2 = _mm256_extractf128_ps(msum1, 1);</div>
|
|
<div class="line"><a name="l00549"></a><span class="lineno"> 549</span>  msum2 += _mm256_extractf128_ps(msum1, 0);</div>
|
|
<div class="line"><a name="l00550"></a><span class="lineno"> 550</span> </div>
|
|
<div class="line"><a name="l00551"></a><span class="lineno"> 551</span>  <span class="keywordflow">if</span> (d >= 4) {</div>
|
|
<div class="line"><a name="l00552"></a><span class="lineno"> 552</span>  __m128 mx = _mm_loadu_ps (x); x += 4;</div>
|
|
<div class="line"><a name="l00553"></a><span class="lineno"> 553</span>  __m128 my = _mm_loadu_ps (y); y += 4;</div>
|
|
<div class="line"><a name="l00554"></a><span class="lineno"> 554</span>  <span class="keyword">const</span> __m128 a_m_b1 = mx - my;</div>
|
|
<div class="line"><a name="l00555"></a><span class="lineno"> 555</span>  msum2 += a_m_b1 * a_m_b1;</div>
|
|
<div class="line"><a name="l00556"></a><span class="lineno"> 556</span>  d -= 4;</div>
|
|
<div class="line"><a name="l00557"></a><span class="lineno"> 557</span>  }</div>
|
|
<div class="line"><a name="l00558"></a><span class="lineno"> 558</span> </div>
|
|
<div class="line"><a name="l00559"></a><span class="lineno"> 559</span>  <span class="keywordflow">if</span> (d > 0) {</div>
|
|
<div class="line"><a name="l00560"></a><span class="lineno"> 560</span>  __m128 mx = masked_read (d, x);</div>
|
|
<div class="line"><a name="l00561"></a><span class="lineno"> 561</span>  __m128 my = masked_read (d, y);</div>
|
|
<div class="line"><a name="l00562"></a><span class="lineno"> 562</span>  __m128 a_m_b1 = mx - my;</div>
|
|
<div class="line"><a name="l00563"></a><span class="lineno"> 563</span>  msum2 += a_m_b1 * a_m_b1;</div>
|
|
<div class="line"><a name="l00564"></a><span class="lineno"> 564</span>  }</div>
|
|
<div class="line"><a name="l00565"></a><span class="lineno"> 565</span> </div>
|
|
<div class="line"><a name="l00566"></a><span class="lineno"> 566</span>  msum2 = _mm_hadd_ps (msum2, msum2);</div>
|
|
<div class="line"><a name="l00567"></a><span class="lineno"> 567</span>  msum2 = _mm_hadd_ps (msum2, msum2);</div>
|
|
<div class="line"><a name="l00568"></a><span class="lineno"> 568</span>  <span class="keywordflow">return</span> _mm_cvtss_f32 (msum2);</div>
|
|
<div class="line"><a name="l00569"></a><span class="lineno"> 569</span> }</div>
|
|
<div class="line"><a name="l00570"></a><span class="lineno"> 570</span> </div>
|
|
<div class="line"><a name="l00571"></a><span class="lineno"> 571</span> <span class="preprocessor">#else</span></div>
|
|
<div class="line"><a name="l00572"></a><span class="lineno"> 572</span> <span class="preprocessor"></span></div>
|
|
<div class="line"><a name="l00573"></a><span class="lineno"> 573</span> <span class="comment">/* SSE-implementation of L2 distance */</span></div>
|
|
<div class="line"><a name="l00574"></a><span class="lineno"><a class="line" href="namespacefaiss.html#a7466bd32de31640860393a701eaac5ad"> 574</a></span> <span class="keywordtype">float</span> <a class="code" href="namespacefaiss.html#a7466bd32de31640860393a701eaac5ad">fvec_L2sqr</a> (<span class="keyword">const</span> <span class="keywordtype">float</span> * x,</div>
|
|
<div class="line"><a name="l00575"></a><span class="lineno"> 575</span>  <span class="keyword">const</span> <span class="keywordtype">float</span> * y,</div>
|
|
<div class="line"><a name="l00576"></a><span class="lineno"> 576</span>  <span class="keywordtype">size_t</span> d)</div>
|
|
<div class="line"><a name="l00577"></a><span class="lineno"> 577</span> {</div>
|
|
<div class="line"><a name="l00578"></a><span class="lineno"> 578</span>  __m128 msum1 = _mm_setzero_ps();</div>
|
|
<div class="line"><a name="l00579"></a><span class="lineno"> 579</span> </div>
|
|
<div class="line"><a name="l00580"></a><span class="lineno"> 580</span>  <span class="keywordflow">while</span> (d >= 4) {</div>
|
|
<div class="line"><a name="l00581"></a><span class="lineno"> 581</span>  __m128 mx = _mm_loadu_ps (x); x += 4;</div>
|
|
<div class="line"><a name="l00582"></a><span class="lineno"> 582</span>  __m128 my = _mm_loadu_ps (y); y += 4;</div>
|
|
<div class="line"><a name="l00583"></a><span class="lineno"> 583</span>  <span class="keyword">const</span> __m128 a_m_b1 = mx - my;</div>
|
|
<div class="line"><a name="l00584"></a><span class="lineno"> 584</span>  msum1 += a_m_b1 * a_m_b1;</div>
|
|
<div class="line"><a name="l00585"></a><span class="lineno"> 585</span>  d -= 4;</div>
|
|
<div class="line"><a name="l00586"></a><span class="lineno"> 586</span>  }</div>
|
|
<div class="line"><a name="l00587"></a><span class="lineno"> 587</span> </div>
|
|
<div class="line"><a name="l00588"></a><span class="lineno"> 588</span>  <span class="keywordflow">if</span> (d > 0) {</div>
|
|
<div class="line"><a name="l00589"></a><span class="lineno"> 589</span>  <span class="comment">// add the last 1, 2 or 3 values</span></div>
|
|
<div class="line"><a name="l00590"></a><span class="lineno"> 590</span>  __m128 mx = masked_read (d, x);</div>
|
|
<div class="line"><a name="l00591"></a><span class="lineno"> 591</span>  __m128 my = masked_read (d, y);</div>
|
|
<div class="line"><a name="l00592"></a><span class="lineno"> 592</span>  __m128 a_m_b1 = mx - my;</div>
|
|
<div class="line"><a name="l00593"></a><span class="lineno"> 593</span>  msum1 += a_m_b1 * a_m_b1;</div>
|
|
<div class="line"><a name="l00594"></a><span class="lineno"> 594</span>  }</div>
|
|
<div class="line"><a name="l00595"></a><span class="lineno"> 595</span> </div>
|
|
<div class="line"><a name="l00596"></a><span class="lineno"> 596</span>  msum1 = _mm_hadd_ps (msum1, msum1);</div>
|
|
<div class="line"><a name="l00597"></a><span class="lineno"> 597</span>  msum1 = _mm_hadd_ps (msum1, msum1);</div>
|
|
<div class="line"><a name="l00598"></a><span class="lineno"> 598</span>  <span class="keywordflow">return</span> _mm_cvtss_f32 (msum1);</div>
|
|
<div class="line"><a name="l00599"></a><span class="lineno"> 599</span> }</div>
|
|
<div class="line"><a name="l00600"></a><span class="lineno"> 600</span> </div>
|
|
<div class="line"><a name="l00601"></a><span class="lineno"> 601</span> </div>
|
|
<div class="line"><a name="l00602"></a><span class="lineno"> 602</span> <span class="keywordtype">float</span> fvec_inner_product (<span class="keyword">const</span> <span class="keywordtype">float</span> * x,</div>
|
|
<div class="line"><a name="l00603"></a><span class="lineno"> 603</span>  <span class="keyword">const</span> <span class="keywordtype">float</span> * y,</div>
|
|
<div class="line"><a name="l00604"></a><span class="lineno"> 604</span>  <span class="keywordtype">size_t</span> d)</div>
|
|
<div class="line"><a name="l00605"></a><span class="lineno"> 605</span> {</div>
|
|
<div class="line"><a name="l00606"></a><span class="lineno"> 606</span>  __m128 mx, my;</div>
|
|
<div class="line"><a name="l00607"></a><span class="lineno"> 607</span>  __m128 msum1 = _mm_setzero_ps();</div>
|
|
<div class="line"><a name="l00608"></a><span class="lineno"> 608</span> </div>
|
|
<div class="line"><a name="l00609"></a><span class="lineno"> 609</span>  <span class="keywordflow">while</span> (d >= 4) {</div>
|
|
<div class="line"><a name="l00610"></a><span class="lineno"> 610</span>  mx = _mm_loadu_ps (x); x += 4;</div>
|
|
<div class="line"><a name="l00611"></a><span class="lineno"> 611</span>  my = _mm_loadu_ps (y); y += 4;</div>
|
|
<div class="line"><a name="l00612"></a><span class="lineno"> 612</span>  msum1 = _mm_add_ps (msum1, _mm_mul_ps (mx, my));</div>
|
|
<div class="line"><a name="l00613"></a><span class="lineno"> 613</span>  d -= 4;</div>
|
|
<div class="line"><a name="l00614"></a><span class="lineno"> 614</span>  }</div>
|
|
<div class="line"><a name="l00615"></a><span class="lineno"> 615</span> </div>
|
|
<div class="line"><a name="l00616"></a><span class="lineno"> 616</span>  <span class="comment">// add the last 1, 2, or 3 values</span></div>
|
|
<div class="line"><a name="l00617"></a><span class="lineno"> 617</span>  mx = masked_read (d, x);</div>
|
|
<div class="line"><a name="l00618"></a><span class="lineno"> 618</span>  my = masked_read (d, y);</div>
|
|
<div class="line"><a name="l00619"></a><span class="lineno"> 619</span>  __m128 prod = _mm_mul_ps (mx, my);</div>
|
|
<div class="line"><a name="l00620"></a><span class="lineno"> 620</span> </div>
|
|
<div class="line"><a name="l00621"></a><span class="lineno"> 621</span>  msum1 = _mm_add_ps (msum1, prod);</div>
|
|
<div class="line"><a name="l00622"></a><span class="lineno"> 622</span> </div>
|
|
<div class="line"><a name="l00623"></a><span class="lineno"> 623</span>  msum1 = _mm_hadd_ps (msum1, msum1);</div>
|
|
<div class="line"><a name="l00624"></a><span class="lineno"> 624</span>  msum1 = _mm_hadd_ps (msum1, msum1);</div>
|
|
<div class="line"><a name="l00625"></a><span class="lineno"> 625</span>  <span class="keywordflow">return</span> _mm_cvtss_f32 (msum1);</div>
|
|
<div class="line"><a name="l00626"></a><span class="lineno"> 626</span> }</div>
|
|
<div class="line"><a name="l00627"></a><span class="lineno"> 627</span> </div>
|
|
<div class="line"><a name="l00628"></a><span class="lineno"> 628</span> </div>
|
|
<div class="line"><a name="l00629"></a><span class="lineno"> 629</span> </div>
|
|
<div class="line"><a name="l00630"></a><span class="lineno"> 630</span> <span class="preprocessor">#endif</span></div>
|
|
<div class="line"><a name="l00631"></a><span class="lineno"> 631</span> <span class="preprocessor"></span></div>
|
|
<div class="line"><a name="l00632"></a><span class="lineno"><a class="line" href="namespacefaiss.html#a7a49180ebf10e643217bbce5862c7f84"> 632</a></span> <span class="keywordtype">float</span> <a class="code" href="namespacefaiss.html#a7a49180ebf10e643217bbce5862c7f84">fvec_norm_L2sqr</a> (<span class="keyword">const</span> <span class="keywordtype">float</span> * x,</div>
|
|
<div class="line"><a name="l00633"></a><span class="lineno"> 633</span>  <span class="keywordtype">size_t</span> d)</div>
|
|
<div class="line"><a name="l00634"></a><span class="lineno"> 634</span> {</div>
|
|
<div class="line"><a name="l00635"></a><span class="lineno"> 635</span>  __m128 mx;</div>
|
|
<div class="line"><a name="l00636"></a><span class="lineno"> 636</span>  __m128 msum1 = _mm_setzero_ps();</div>
|
|
<div class="line"><a name="l00637"></a><span class="lineno"> 637</span> </div>
|
|
<div class="line"><a name="l00638"></a><span class="lineno"> 638</span>  <span class="keywordflow">while</span> (d >= 4) {</div>
|
|
<div class="line"><a name="l00639"></a><span class="lineno"> 639</span>  mx = _mm_loadu_ps (x); x += 4;</div>
|
|
<div class="line"><a name="l00640"></a><span class="lineno"> 640</span>  msum1 = _mm_add_ps (msum1, _mm_mul_ps (mx, mx));</div>
|
|
<div class="line"><a name="l00641"></a><span class="lineno"> 641</span>  d -= 4;</div>
|
|
<div class="line"><a name="l00642"></a><span class="lineno"> 642</span>  }</div>
|
|
<div class="line"><a name="l00643"></a><span class="lineno"> 643</span> </div>
|
|
<div class="line"><a name="l00644"></a><span class="lineno"> 644</span>  mx = masked_read (d, x);</div>
|
|
<div class="line"><a name="l00645"></a><span class="lineno"> 645</span>  msum1 = _mm_add_ps (msum1, _mm_mul_ps (mx, mx));</div>
|
|
<div class="line"><a name="l00646"></a><span class="lineno"> 646</span> </div>
|
|
<div class="line"><a name="l00647"></a><span class="lineno"> 647</span>  msum1 = _mm_hadd_ps (msum1, msum1);</div>
|
|
<div class="line"><a name="l00648"></a><span class="lineno"> 648</span>  msum1 = _mm_hadd_ps (msum1, msum1);</div>
|
|
<div class="line"><a name="l00649"></a><span class="lineno"> 649</span>  <span class="keywordflow">return</span> _mm_cvtss_f32 (msum1);</div>
|
|
<div class="line"><a name="l00650"></a><span class="lineno"> 650</span> }</div>
|
|
<div class="line"><a name="l00651"></a><span class="lineno"> 651</span> </div>
|
|
<div class="line"><a name="l00652"></a><span class="lineno"> 652</span> </div>
|
|
<div class="line"><a name="l00653"></a><span class="lineno"> 653</span> </div>
|
|
<div class="line"><a name="l00654"></a><span class="lineno"> 654</span> </div>
|
|
<div class="line"><a name="l00655"></a><span class="lineno"> 655</span> <span class="comment">/***************************************************************************</span></div>
|
|
<div class="line"><a name="l00656"></a><span class="lineno"> 656</span> <span class="comment"> * Matrix/vector ops</span></div>
|
|
<div class="line"><a name="l00657"></a><span class="lineno"> 657</span> <span class="comment"> ***************************************************************************/</span></div>
|
|
<div class="line"><a name="l00658"></a><span class="lineno"> 658</span> </div>
|
|
<div class="line"><a name="l00659"></a><span class="lineno"> 659</span> </div>
|
|
<div class="line"><a name="l00660"></a><span class="lineno"> 660</span> </div>
|
|
<div class="line"><a name="l00661"></a><span class="lineno"> 661</span> <span class="comment">/* Compute the inner product between a vector x and</span></div>
|
|
<div class="line"><a name="l00662"></a><span class="lineno"> 662</span> <span class="comment"> a set of ny vectors y.</span></div>
|
|
<div class="line"><a name="l00663"></a><span class="lineno"> 663</span> <span class="comment"> These functions are not intended to replace BLAS matrix-matrix, as they</span></div>
|
|
<div class="line"><a name="l00664"></a><span class="lineno"> 664</span> <span class="comment"> would be significantly less efficient in this case. */</span></div>
|
|
<div class="line"><a name="l00665"></a><span class="lineno"> 665</span> <span class="keywordtype">void</span> fvec_inner_products_ny (<span class="keywordtype">float</span> * __restrict ip,</div>
|
|
<div class="line"><a name="l00666"></a><span class="lineno"> 666</span>  <span class="keyword">const</span> <span class="keywordtype">float</span> * x,</div>
|
|
<div class="line"><a name="l00667"></a><span class="lineno"> 667</span>  <span class="keyword">const</span> <span class="keywordtype">float</span> * y,</div>
|
|
<div class="line"><a name="l00668"></a><span class="lineno"> 668</span>  <span class="keywordtype">size_t</span> d, <span class="keywordtype">size_t</span> ny)</div>
|
|
<div class="line"><a name="l00669"></a><span class="lineno"> 669</span> {</div>
|
|
<div class="line"><a name="l00670"></a><span class="lineno"> 670</span>  <span class="keywordflow">for</span> (<span class="keywordtype">size_t</span> i = 0; i < ny; i++) {</div>
|
|
<div class="line"><a name="l00671"></a><span class="lineno"> 671</span>  ip[i] = fvec_inner_product (x, y, d);</div>
|
|
<div class="line"><a name="l00672"></a><span class="lineno"> 672</span>  y += d;</div>
|
|
<div class="line"><a name="l00673"></a><span class="lineno"> 673</span>  }</div>
|
|
<div class="line"><a name="l00674"></a><span class="lineno"> 674</span> }</div>
|
|
<div class="line"><a name="l00675"></a><span class="lineno"> 675</span> </div>
|
|
<div class="line"><a name="l00676"></a><span class="lineno"> 676</span> </div>
|
|
<div class="line"><a name="l00677"></a><span class="lineno"> 677</span> </div>
|
|
<div class="line"><a name="l00678"></a><span class="lineno"> 678</span> </div>
|
|
<div class="line"><a name="l00679"></a><span class="lineno"> 679</span> <span class="comment">/* compute ny L2 distances between x and a set of vectors y */</span></div>
|
|
<div class="line"><a name="l00680"></a><span class="lineno"> 680</span> <span class="keywordtype">void</span> fvec_L2sqr_ny (<span class="keywordtype">float</span> * __restrict dis,</div>
|
|
<div class="line"><a name="l00681"></a><span class="lineno"> 681</span>  <span class="keyword">const</span> <span class="keywordtype">float</span> * x,</div>
|
|
<div class="line"><a name="l00682"></a><span class="lineno"> 682</span>  <span class="keyword">const</span> <span class="keywordtype">float</span> * y,</div>
|
|
<div class="line"><a name="l00683"></a><span class="lineno"> 683</span>  <span class="keywordtype">size_t</span> d, <span class="keywordtype">size_t</span> ny)</div>
|
|
<div class="line"><a name="l00684"></a><span class="lineno"> 684</span> {</div>
|
|
<div class="line"><a name="l00685"></a><span class="lineno"> 685</span>  <span class="keywordflow">for</span> (<span class="keywordtype">size_t</span> i = 0; i < ny; i++) {</div>
|
|
<div class="line"><a name="l00686"></a><span class="lineno"> 686</span>  dis[i] = <a class="code" href="namespacefaiss.html#a7466bd32de31640860393a701eaac5ad">fvec_L2sqr</a> (x, y, d);</div>
|
|
<div class="line"><a name="l00687"></a><span class="lineno"> 687</span>  y += d;</div>
|
|
<div class="line"><a name="l00688"></a><span class="lineno"> 688</span>  }</div>
|
|
<div class="line"><a name="l00689"></a><span class="lineno"> 689</span> }</div>
|
|
<div class="line"><a name="l00690"></a><span class="lineno"> 690</span> </div>
|
|
<div class="line"><a name="l00691"></a><span class="lineno"> 691</span> </div>
|
|
<div class="line"><a name="l00692"></a><span class="lineno"> 692</span> </div>
|
|
<div class="line"><a name="l00693"></a><span class="lineno"> 693</span> </div>
|
|
<div class="line"><a name="l00694"></a><span class="lineno"> 694</span> <span class="comment">/* Compute the L2 norm of a set of nx vectors */</span></div>
|
|
<div class="line"><a name="l00695"></a><span class="lineno"> 695</span> <span class="keywordtype">void</span> fvec_norms_L2 (<span class="keywordtype">float</span> * __restrict nr,</div>
|
|
<div class="line"><a name="l00696"></a><span class="lineno"> 696</span>  <span class="keyword">const</span> <span class="keywordtype">float</span> * __restrict x,</div>
|
|
<div class="line"><a name="l00697"></a><span class="lineno"> 697</span>  <span class="keywordtype">size_t</span> d, <span class="keywordtype">size_t</span> nx)</div>
|
|
<div class="line"><a name="l00698"></a><span class="lineno"> 698</span> {</div>
|
|
<div class="line"><a name="l00699"></a><span class="lineno"> 699</span> </div>
|
|
<div class="line"><a name="l00700"></a><span class="lineno"> 700</span> <span class="preprocessor">#pragma omp parallel for</span></div>
|
|
<div class="line"><a name="l00701"></a><span class="lineno"> 701</span> <span class="preprocessor"></span> <span class="keywordflow">for</span> (<span class="keywordtype">size_t</span> i = 0; i < nx; i++) {</div>
|
|
<div class="line"><a name="l00702"></a><span class="lineno"> 702</span>  nr[i] = sqrtf (<a class="code" href="namespacefaiss.html#a7a49180ebf10e643217bbce5862c7f84">fvec_norm_L2sqr</a> (x + i * d, d));</div>
|
|
<div class="line"><a name="l00703"></a><span class="lineno"> 703</span>  }</div>
|
|
<div class="line"><a name="l00704"></a><span class="lineno"> 704</span> }</div>
|
|
<div class="line"><a name="l00705"></a><span class="lineno"> 705</span> </div>
|
|
<div class="line"><a name="l00706"></a><span class="lineno"> 706</span> <span class="keywordtype">void</span> fvec_norms_L2sqr (<span class="keywordtype">float</span> * __restrict nr,</div>
|
|
<div class="line"><a name="l00707"></a><span class="lineno"> 707</span>  <span class="keyword">const</span> <span class="keywordtype">float</span> * __restrict x,</div>
|
|
<div class="line"><a name="l00708"></a><span class="lineno"> 708</span>  <span class="keywordtype">size_t</span> d, <span class="keywordtype">size_t</span> nx)</div>
|
|
<div class="line"><a name="l00709"></a><span class="lineno"> 709</span> {</div>
|
|
<div class="line"><a name="l00710"></a><span class="lineno"> 710</span> <span class="preprocessor">#pragma omp parallel for</span></div>
|
|
<div class="line"><a name="l00711"></a><span class="lineno"> 711</span> <span class="preprocessor"></span> <span class="keywordflow">for</span> (<span class="keywordtype">size_t</span> i = 0; i < nx; i++)</div>
|
|
<div class="line"><a name="l00712"></a><span class="lineno"> 712</span>  nr[i] = <a class="code" href="namespacefaiss.html#a7a49180ebf10e643217bbce5862c7f84">fvec_norm_L2sqr</a> (x + i * d, d);</div>
|
|
<div class="line"><a name="l00713"></a><span class="lineno"> 713</span> }</div>
|
|
<div class="line"><a name="l00714"></a><span class="lineno"> 714</span> </div>
|
|
<div class="line"><a name="l00715"></a><span class="lineno"> 715</span> </div>
|
|
<div class="line"><a name="l00716"></a><span class="lineno"> 716</span> </div>
|
|
<div class="line"><a name="l00717"></a><span class="lineno"> 717</span> <span class="keywordtype">void</span> fvec_renorm_L2 (<span class="keywordtype">size_t</span> d, <span class="keywordtype">size_t</span> nx, <span class="keywordtype">float</span> * __restrict x)</div>
|
|
<div class="line"><a name="l00718"></a><span class="lineno"> 718</span> {</div>
|
|
<div class="line"><a name="l00719"></a><span class="lineno"> 719</span> <span class="preprocessor">#pragma omp parallel for</span></div>
|
|
<div class="line"><a name="l00720"></a><span class="lineno"> 720</span> <span class="preprocessor"></span> <span class="keywordflow">for</span> (<span class="keywordtype">size_t</span> i = 0; i < nx; i++) {</div>
|
|
<div class="line"><a name="l00721"></a><span class="lineno"> 721</span>  <span class="keywordtype">float</span> * __restrict xi = x + i * d;</div>
|
|
<div class="line"><a name="l00722"></a><span class="lineno"> 722</span> </div>
|
|
<div class="line"><a name="l00723"></a><span class="lineno"> 723</span>  <span class="keywordtype">float</span> nr = <a class="code" href="namespacefaiss.html#a7a49180ebf10e643217bbce5862c7f84">fvec_norm_L2sqr</a> (xi, d);</div>
|
|
<div class="line"><a name="l00724"></a><span class="lineno"> 724</span> </div>
|
|
<div class="line"><a name="l00725"></a><span class="lineno"> 725</span>  <span class="keywordflow">if</span> (nr > 0) {</div>
|
|
<div class="line"><a name="l00726"></a><span class="lineno"> 726</span>  <span class="keywordtype">size_t</span> j;</div>
|
|
<div class="line"><a name="l00727"></a><span class="lineno"> 727</span>  <span class="keyword">const</span> <span class="keywordtype">float</span> inv_nr = 1.0 / sqrtf (nr);</div>
|
|
<div class="line"><a name="l00728"></a><span class="lineno"> 728</span>  <span class="keywordflow">for</span> (j = 0; j < d; j++)</div>
|
|
<div class="line"><a name="l00729"></a><span class="lineno"> 729</span>  xi[j] *= inv_nr;</div>
|
|
<div class="line"><a name="l00730"></a><span class="lineno"> 730</span>  }</div>
|
|
<div class="line"><a name="l00731"></a><span class="lineno"> 731</span>  }</div>
|
|
<div class="line"><a name="l00732"></a><span class="lineno"> 732</span> }</div>
|
|
<div class="line"><a name="l00733"></a><span class="lineno"> 733</span> </div>
|
|
<div class="line"><a name="l00734"></a><span class="lineno"> 734</span> </div>
|
|
<div class="line"><a name="l00735"></a><span class="lineno"> 735</span> </div>
|
|
<div class="line"><a name="l00736"></a><span class="lineno"> 736</span> </div>
|
|
<div class="line"><a name="l00737"></a><span class="lineno"> 737</span> </div>
|
|
<div class="line"><a name="l00738"></a><span class="lineno"> 738</span> </div>
|
|
<div class="line"><a name="l00739"></a><span class="lineno"> 739</span> </div>
|
|
<div class="line"><a name="l00740"></a><span class="lineno"> 740</span> </div>
|
|
<div class="line"><a name="l00741"></a><span class="lineno"> 741</span> </div>
|
|
<div class="line"><a name="l00742"></a><span class="lineno"> 742</span> </div>
|
|
<div class="line"><a name="l00743"></a><span class="lineno"> 743</span> </div>
|
|
<div class="line"><a name="l00744"></a><span class="lineno"> 744</span> </div>
|
|
<div class="line"><a name="l00745"></a><span class="lineno"> 745</span> </div>
|
|
<div class="line"><a name="l00746"></a><span class="lineno"> 746</span> </div>
|
|
<div class="line"><a name="l00747"></a><span class="lineno"> 747</span> </div>
|
|
<div class="line"><a name="l00748"></a><span class="lineno"> 748</span> </div>
|
|
<div class="line"><a name="l00749"></a><span class="lineno"> 749</span> </div>
|
|
<div class="line"><a name="l00750"></a><span class="lineno"> 750</span> <span class="comment">/***************************************************************************</span></div>
|
|
<div class="line"><a name="l00751"></a><span class="lineno"> 751</span> <span class="comment"> * KNN functions</span></div>
|
|
<div class="line"><a name="l00752"></a><span class="lineno"> 752</span> <span class="comment"> ***************************************************************************/</span></div>
|
|
<div class="line"><a name="l00753"></a><span class="lineno"> 753</span> </div>
|
|
<div class="line"><a name="l00754"></a><span class="lineno"> 754</span> </div>
|
|
<div class="line"><a name="l00755"></a><span class="lineno"> 755</span> </div>
|
|
<div class="line"><a name="l00756"></a><span class="lineno"> 756</span> <span class="comment">/* Find the nearest neighbors for nx queries in a set of ny vectors */</span></div>
|
|
<div class="line"><a name="l00757"></a><span class="lineno"> 757</span> <span class="keyword">static</span> <span class="keywordtype">void</span> knn_inner_product_sse (<span class="keyword">const</span> <span class="keywordtype">float</span> * x,</div>
|
|
<div class="line"><a name="l00758"></a><span class="lineno"> 758</span>  <span class="keyword">const</span> <span class="keywordtype">float</span> * y,</div>
|
|
<div class="line"><a name="l00759"></a><span class="lineno"> 759</span>  <span class="keywordtype">size_t</span> d, <span class="keywordtype">size_t</span> nx, <span class="keywordtype">size_t</span> ny,</div>
|
|
<div class="line"><a name="l00760"></a><span class="lineno"> 760</span>  float_minheap_array_t * res)</div>
|
|
<div class="line"><a name="l00761"></a><span class="lineno"> 761</span> {</div>
|
|
<div class="line"><a name="l00762"></a><span class="lineno"> 762</span>  <span class="keywordtype">size_t</span> k = res->k;</div>
|
|
<div class="line"><a name="l00763"></a><span class="lineno"> 763</span> </div>
|
|
<div class="line"><a name="l00764"></a><span class="lineno"> 764</span> <span class="preprocessor">#pragma omp parallel for</span></div>
|
|
<div class="line"><a name="l00765"></a><span class="lineno"> 765</span> <span class="preprocessor"></span> <span class="keywordflow">for</span> (<span class="keywordtype">size_t</span> i = 0; i < nx; i++) {</div>
|
|
<div class="line"><a name="l00766"></a><span class="lineno"> 766</span>  <span class="keyword">const</span> <span class="keywordtype">float</span> * x_ = x + i * d;</div>
|
|
<div class="line"><a name="l00767"></a><span class="lineno"> 767</span>  <span class="keyword">const</span> <span class="keywordtype">float</span> * y_ = y;</div>
|
|
<div class="line"><a name="l00768"></a><span class="lineno"> 768</span> </div>
|
|
<div class="line"><a name="l00769"></a><span class="lineno"> 769</span>  <span class="keywordtype">float</span> * __restrict simi = res->get_val(i);</div>
|
|
<div class="line"><a name="l00770"></a><span class="lineno"> 770</span>  <span class="keywordtype">long</span> * __restrict idxi = res->get_ids (i);</div>
|
|
<div class="line"><a name="l00771"></a><span class="lineno"> 771</span> </div>
|
|
<div class="line"><a name="l00772"></a><span class="lineno"> 772</span>  minheap_heapify (k, simi, idxi);</div>
|
|
<div class="line"><a name="l00773"></a><span class="lineno"> 773</span> </div>
|
|
<div class="line"><a name="l00774"></a><span class="lineno"> 774</span>  <span class="keywordflow">for</span> (<span class="keywordtype">size_t</span> j = 0; j < ny; j++) {</div>
|
|
<div class="line"><a name="l00775"></a><span class="lineno"> 775</span>  <span class="keywordtype">float</span> ip = fvec_inner_product (x_, y_, d);</div>
|
|
<div class="line"><a name="l00776"></a><span class="lineno"> 776</span> </div>
|
|
<div class="line"><a name="l00777"></a><span class="lineno"> 777</span>  <span class="keywordflow">if</span> (ip > simi[0]) {</div>
|
|
<div class="line"><a name="l00778"></a><span class="lineno"> 778</span>  minheap_pop (k, simi, idxi);</div>
|
|
<div class="line"><a name="l00779"></a><span class="lineno"> 779</span>  minheap_push (k, simi, idxi, ip, j);</div>
|
|
<div class="line"><a name="l00780"></a><span class="lineno"> 780</span>  }</div>
|
|
<div class="line"><a name="l00781"></a><span class="lineno"> 781</span>  y_ += d;</div>
|
|
<div class="line"><a name="l00782"></a><span class="lineno"> 782</span>  }</div>
|
|
<div class="line"><a name="l00783"></a><span class="lineno"> 783</span>  minheap_reorder (k, simi, idxi);</div>
|
|
<div class="line"><a name="l00784"></a><span class="lineno"> 784</span>  }</div>
|
|
<div class="line"><a name="l00785"></a><span class="lineno"> 785</span> </div>
|
|
<div class="line"><a name="l00786"></a><span class="lineno"> 786</span> }</div>
|
|
<div class="line"><a name="l00787"></a><span class="lineno"> 787</span> </div>
|
|
<div class="line"><a name="l00788"></a><span class="lineno"> 788</span> <span class="keyword">static</span> <span class="keywordtype">void</span> knn_L2sqr_sse (</div>
|
|
<div class="line"><a name="l00789"></a><span class="lineno"> 789</span>  <span class="keyword">const</span> <span class="keywordtype">float</span> * x,</div>
|
|
<div class="line"><a name="l00790"></a><span class="lineno"> 790</span>  <span class="keyword">const</span> <span class="keywordtype">float</span> * y,</div>
|
|
<div class="line"><a name="l00791"></a><span class="lineno"> 791</span>  <span class="keywordtype">size_t</span> d, <span class="keywordtype">size_t</span> nx, <span class="keywordtype">size_t</span> ny,</div>
|
|
<div class="line"><a name="l00792"></a><span class="lineno"> 792</span>  float_maxheap_array_t * res)</div>
|
|
<div class="line"><a name="l00793"></a><span class="lineno"> 793</span> {</div>
|
|
<div class="line"><a name="l00794"></a><span class="lineno"> 794</span>  <span class="keywordtype">size_t</span> k = res->k;</div>
|
|
<div class="line"><a name="l00795"></a><span class="lineno"> 795</span> </div>
|
|
<div class="line"><a name="l00796"></a><span class="lineno"> 796</span> <span class="preprocessor">#pragma omp parallel for</span></div>
|
|
<div class="line"><a name="l00797"></a><span class="lineno"> 797</span> <span class="preprocessor"></span> <span class="keywordflow">for</span> (<span class="keywordtype">size_t</span> i = 0; i < nx; i++) {</div>
|
|
<div class="line"><a name="l00798"></a><span class="lineno"> 798</span>  <span class="keyword">const</span> <span class="keywordtype">float</span> * x_ = x + i * d;</div>
|
|
<div class="line"><a name="l00799"></a><span class="lineno"> 799</span>  <span class="keyword">const</span> <span class="keywordtype">float</span> * y_ = y;</div>
|
|
<div class="line"><a name="l00800"></a><span class="lineno"> 800</span>  <span class="keywordtype">size_t</span> j;</div>
|
|
<div class="line"><a name="l00801"></a><span class="lineno"> 801</span>  <span class="keywordtype">float</span> * __restrict simi = res->get_val(i);</div>
|
|
<div class="line"><a name="l00802"></a><span class="lineno"> 802</span>  <span class="keywordtype">long</span> * __restrict idxi = res->get_ids (i);</div>
|
|
<div class="line"><a name="l00803"></a><span class="lineno"> 803</span> </div>
|
|
<div class="line"><a name="l00804"></a><span class="lineno"> 804</span>  maxheap_heapify (k, simi, idxi);</div>
|
|
<div class="line"><a name="l00805"></a><span class="lineno"> 805</span>  <span class="keywordflow">for</span> (j = 0; j < ny; j++) {</div>
|
|
<div class="line"><a name="l00806"></a><span class="lineno"> 806</span>  <span class="keywordtype">float</span> disij = <a class="code" href="namespacefaiss.html#a7466bd32de31640860393a701eaac5ad">fvec_L2sqr</a> (x_, y_, d);</div>
|
|
<div class="line"><a name="l00807"></a><span class="lineno"> 807</span> </div>
|
|
<div class="line"><a name="l00808"></a><span class="lineno"> 808</span>  <span class="keywordflow">if</span> (disij < simi[0]) {</div>
|
|
<div class="line"><a name="l00809"></a><span class="lineno"> 809</span>  maxheap_pop (k, simi, idxi);</div>
|
|
<div class="line"><a name="l00810"></a><span class="lineno"> 810</span>  maxheap_push (k, simi, idxi, disij, j);</div>
|
|
<div class="line"><a name="l00811"></a><span class="lineno"> 811</span>  }</div>
|
|
<div class="line"><a name="l00812"></a><span class="lineno"> 812</span>  y_ += d;</div>
|
|
<div class="line"><a name="l00813"></a><span class="lineno"> 813</span>  }</div>
|
|
<div class="line"><a name="l00814"></a><span class="lineno"> 814</span>  maxheap_reorder (k, simi, idxi);</div>
|
|
<div class="line"><a name="l00815"></a><span class="lineno"> 815</span>  }</div>
|
|
<div class="line"><a name="l00816"></a><span class="lineno"> 816</span> </div>
|
|
<div class="line"><a name="l00817"></a><span class="lineno"> 817</span> }</div>
|
|
<div class="line"><a name="l00818"></a><span class="lineno"> 818</span> </div>
|
|
<div class="line"><a name="l00819"></a><span class="lineno"> 819</span> <span class="comment"></span></div>
|
|
<div class="line"><a name="l00820"></a><span class="lineno"> 820</span> <span class="comment">/** Find the nearest neighbors for nx queries in a set of ny vectors */</span></div>
|
|
<div class="line"><a name="l00821"></a><span class="lineno"> 821</span> <span class="keyword">static</span> <span class="keywordtype">void</span> knn_inner_product_blas (</div>
|
|
<div class="line"><a name="l00822"></a><span class="lineno"> 822</span>  <span class="keyword">const</span> <span class="keywordtype">float</span> * x,</div>
|
|
<div class="line"><a name="l00823"></a><span class="lineno"> 823</span>  <span class="keyword">const</span> <span class="keywordtype">float</span> * y,</div>
|
|
<div class="line"><a name="l00824"></a><span class="lineno"> 824</span>  <span class="keywordtype">size_t</span> d, <span class="keywordtype">size_t</span> nx, <span class="keywordtype">size_t</span> ny,</div>
|
|
<div class="line"><a name="l00825"></a><span class="lineno"> 825</span>  float_minheap_array_t * res)</div>
|
|
<div class="line"><a name="l00826"></a><span class="lineno"> 826</span> {</div>
|
|
<div class="line"><a name="l00827"></a><span class="lineno"> 827</span>  res->heapify ();</div>
|
|
<div class="line"><a name="l00828"></a><span class="lineno"> 828</span> </div>
|
|
<div class="line"><a name="l00829"></a><span class="lineno"> 829</span>  <span class="comment">// BLAS does not like empty matrices</span></div>
|
|
<div class="line"><a name="l00830"></a><span class="lineno"> 830</span>  <span class="keywordflow">if</span> (nx == 0 || ny == 0) <span class="keywordflow">return</span>;</div>
|
|
<div class="line"><a name="l00831"></a><span class="lineno"> 831</span> </div>
|
|
<div class="line"><a name="l00832"></a><span class="lineno"> 832</span>  <span class="comment">/* block sizes */</span></div>
|
|
<div class="line"><a name="l00833"></a><span class="lineno"> 833</span>  <span class="keyword">const</span> <span class="keywordtype">size_t</span> bs_x = 4096, bs_y = 1024;</div>
|
|
<div class="line"><a name="l00834"></a><span class="lineno"> 834</span>  <span class="comment">// const size_t bs_x = 16, bs_y = 16;</span></div>
|
|
<div class="line"><a name="l00835"></a><span class="lineno"> 835</span>  <span class="keywordtype">float</span> *ip_block = <span class="keyword">new</span> <span class="keywordtype">float</span>[bs_x * bs_y];</div>
|
|
<div class="line"><a name="l00836"></a><span class="lineno"> 836</span> </div>
|
|
<div class="line"><a name="l00837"></a><span class="lineno"> 837</span>  <span class="keywordflow">for</span> (<span class="keywordtype">size_t</span> i0 = 0; i0 < nx; i0 += bs_x) {</div>
|
|
<div class="line"><a name="l00838"></a><span class="lineno"> 838</span>  <span class="keywordtype">size_t</span> i1 = i0 + bs_x;</div>
|
|
<div class="line"><a name="l00839"></a><span class="lineno"> 839</span>  <span class="keywordflow">if</span>(i1 > nx) i1 = nx;</div>
|
|
<div class="line"><a name="l00840"></a><span class="lineno"> 840</span> </div>
|
|
<div class="line"><a name="l00841"></a><span class="lineno"> 841</span>  <span class="keywordflow">for</span> (<span class="keywordtype">size_t</span> j0 = 0; j0 < ny; j0 += bs_y) {</div>
|
|
<div class="line"><a name="l00842"></a><span class="lineno"> 842</span>  <span class="keywordtype">size_t</span> j1 = j0 + bs_y;</div>
|
|
<div class="line"><a name="l00843"></a><span class="lineno"> 843</span>  <span class="keywordflow">if</span> (j1 > ny) j1 = ny;</div>
|
|
<div class="line"><a name="l00844"></a><span class="lineno"> 844</span>  <span class="comment">/* compute the actual dot products */</span></div>
|
|
<div class="line"><a name="l00845"></a><span class="lineno"> 845</span>  {</div>
|
|
<div class="line"><a name="l00846"></a><span class="lineno"> 846</span>  <span class="keywordtype">float</span> one = 1, zero = 0;</div>
|
|
<div class="line"><a name="l00847"></a><span class="lineno"> 847</span>  FINTEGER nyi = j1 - j0, nxi = i1 - i0, di = d;</div>
|
|
<div class="line"><a name="l00848"></a><span class="lineno"> 848</span>  sgemm_ (<span class="stringliteral">"Transpose"</span>, <span class="stringliteral">"Not transpose"</span>, &nyi, &nxi, &di, &one,</div>
|
|
<div class="line"><a name="l00849"></a><span class="lineno"> 849</span>  y + j0 * d, &di,</div>
|
|
<div class="line"><a name="l00850"></a><span class="lineno"> 850</span>  x + i0 * d, &di, &zero,</div>
|
|
<div class="line"><a name="l00851"></a><span class="lineno"> 851</span>  ip_block, &nyi);</div>
|
|
<div class="line"><a name="l00852"></a><span class="lineno"> 852</span>  }</div>
|
|
<div class="line"><a name="l00853"></a><span class="lineno"> 853</span> </div>
|
|
<div class="line"><a name="l00854"></a><span class="lineno"> 854</span>  <span class="comment">/* collect maxima */</span></div>
|
|
<div class="line"><a name="l00855"></a><span class="lineno"> 855</span>  res->addn (j1 - j0, ip_block, j0, i0, i1 - i0);</div>
|
|
<div class="line"><a name="l00856"></a><span class="lineno"> 856</span>  }</div>
|
|
<div class="line"><a name="l00857"></a><span class="lineno"> 857</span>  }</div>
|
|
<div class="line"><a name="l00858"></a><span class="lineno"> 858</span>  <span class="keyword">delete</span> [] ip_block;</div>
|
|
<div class="line"><a name="l00859"></a><span class="lineno"> 859</span>  res->reorder ();</div>
|
|
<div class="line"><a name="l00860"></a><span class="lineno"> 860</span> }</div>
|
|
<div class="line"><a name="l00861"></a><span class="lineno"> 861</span> </div>
|
|
<div class="line"><a name="l00862"></a><span class="lineno"> 862</span> <span class="comment">// distance correction is an operator that can be applied to transform</span></div>
|
|
<div class="line"><a name="l00863"></a><span class="lineno"> 863</span> <span class="comment">// the distances</span></div>
|
|
<div class="line"><a name="l00864"></a><span class="lineno"> 864</span> <span class="keyword">template</span><<span class="keyword">class</span> DistanceCorrection></div>
|
|
<div class="line"><a name="l00865"></a><span class="lineno"> 865</span> <span class="keyword">static</span> <span class="keywordtype">void</span> knn_L2sqr_blas (<span class="keyword">const</span> <span class="keywordtype">float</span> * x,</div>
|
|
<div class="line"><a name="l00866"></a><span class="lineno"> 866</span>  <span class="keyword">const</span> <span class="keywordtype">float</span> * y,</div>
|
|
<div class="line"><a name="l00867"></a><span class="lineno"> 867</span>  <span class="keywordtype">size_t</span> d, <span class="keywordtype">size_t</span> nx, <span class="keywordtype">size_t</span> ny,</div>
|
|
<div class="line"><a name="l00868"></a><span class="lineno"> 868</span>  float_maxheap_array_t * res,</div>
|
|
<div class="line"><a name="l00869"></a><span class="lineno"> 869</span>  <span class="keyword">const</span> DistanceCorrection &corr)</div>
|
|
<div class="line"><a name="l00870"></a><span class="lineno"> 870</span> {</div>
|
|
<div class="line"><a name="l00871"></a><span class="lineno"> 871</span>  res->heapify ();</div>
|
|
<div class="line"><a name="l00872"></a><span class="lineno"> 872</span> </div>
|
|
<div class="line"><a name="l00873"></a><span class="lineno"> 873</span>  <span class="comment">// BLAS does not like empty matrices</span></div>
|
|
<div class="line"><a name="l00874"></a><span class="lineno"> 874</span>  <span class="keywordflow">if</span> (nx == 0 || ny == 0) <span class="keywordflow">return</span>;</div>
|
|
<div class="line"><a name="l00875"></a><span class="lineno"> 875</span> </div>
|
|
<div class="line"><a name="l00876"></a><span class="lineno"> 876</span>  <span class="keywordtype">size_t</span> k = res->k;</div>
|
|
<div class="line"><a name="l00877"></a><span class="lineno"> 877</span> </div>
|
|
<div class="line"><a name="l00878"></a><span class="lineno"> 878</span>  <span class="comment">/* block sizes */</span></div>
|
|
<div class="line"><a name="l00879"></a><span class="lineno"> 879</span>  <span class="keyword">const</span> <span class="keywordtype">size_t</span> bs_x = 4096, bs_y = 1024;</div>
|
|
<div class="line"><a name="l00880"></a><span class="lineno"> 880</span>  <span class="comment">// const size_t bs_x = 16, bs_y = 16;</span></div>
|
|
<div class="line"><a name="l00881"></a><span class="lineno"> 881</span>  <span class="keywordtype">float</span> *ip_block = <span class="keyword">new</span> <span class="keywordtype">float</span>[bs_x * bs_y];</div>
|
|
<div class="line"><a name="l00882"></a><span class="lineno"> 882</span> </div>
|
|
<div class="line"><a name="l00883"></a><span class="lineno"> 883</span>  <span class="keywordtype">float</span> *x_norms = <span class="keyword">new</span> <span class="keywordtype">float</span>[nx];</div>
|
|
<div class="line"><a name="l00884"></a><span class="lineno"> 884</span>  fvec_norms_L2sqr (x_norms, x, d, nx);</div>
|
|
<div class="line"><a name="l00885"></a><span class="lineno"> 885</span> </div>
|
|
<div class="line"><a name="l00886"></a><span class="lineno"> 886</span>  <span class="keywordtype">float</span> *y_norms = <span class="keyword">new</span> <span class="keywordtype">float</span>[ny];</div>
|
|
<div class="line"><a name="l00887"></a><span class="lineno"> 887</span>  fvec_norms_L2sqr (y_norms, y, d, ny);</div>
|
|
<div class="line"><a name="l00888"></a><span class="lineno"> 888</span> </div>
|
|
<div class="line"><a name="l00889"></a><span class="lineno"> 889</span>  <span class="keywordflow">for</span> (<span class="keywordtype">size_t</span> i0 = 0; i0 < nx; i0 += bs_x) {</div>
|
|
<div class="line"><a name="l00890"></a><span class="lineno"> 890</span>  <span class="keywordtype">size_t</span> i1 = i0 + bs_x;</div>
|
|
<div class="line"><a name="l00891"></a><span class="lineno"> 891</span>  <span class="keywordflow">if</span>(i1 > nx) i1 = nx;</div>
|
|
<div class="line"><a name="l00892"></a><span class="lineno"> 892</span> </div>
|
|
<div class="line"><a name="l00893"></a><span class="lineno"> 893</span>  <span class="keywordflow">for</span> (<span class="keywordtype">size_t</span> j0 = 0; j0 < ny; j0 += bs_y) {</div>
|
|
<div class="line"><a name="l00894"></a><span class="lineno"> 894</span>  <span class="keywordtype">size_t</span> j1 = j0 + bs_y;</div>
|
|
<div class="line"><a name="l00895"></a><span class="lineno"> 895</span>  <span class="keywordflow">if</span> (j1 > ny) j1 = ny;</div>
|
|
<div class="line"><a name="l00896"></a><span class="lineno"> 896</span>  <span class="comment">/* compute the actual dot products */</span></div>
|
|
<div class="line"><a name="l00897"></a><span class="lineno"> 897</span>  {</div>
|
|
<div class="line"><a name="l00898"></a><span class="lineno"> 898</span>  <span class="keywordtype">float</span> one = 1, zero = 0;</div>
|
|
<div class="line"><a name="l00899"></a><span class="lineno"> 899</span>  FINTEGER nyi = j1 - j0, nxi = i1 - i0, di = d;</div>
|
|
<div class="line"><a name="l00900"></a><span class="lineno"> 900</span>  sgemm_ (<span class="stringliteral">"Transpose"</span>, <span class="stringliteral">"Not transpose"</span>, &nyi, &nxi, &di, &one,</div>
|
|
<div class="line"><a name="l00901"></a><span class="lineno"> 901</span>  y + j0 * d, &di,</div>
|
|
<div class="line"><a name="l00902"></a><span class="lineno"> 902</span>  x + i0 * d, &di, &zero,</div>
|
|
<div class="line"><a name="l00903"></a><span class="lineno"> 903</span>  ip_block, &nyi);</div>
|
|
<div class="line"><a name="l00904"></a><span class="lineno"> 904</span>  }</div>
|
|
<div class="line"><a name="l00905"></a><span class="lineno"> 905</span> </div>
|
|
<div class="line"><a name="l00906"></a><span class="lineno"> 906</span>  <span class="comment">/* collect minima */</span></div>
|
|
<div class="line"><a name="l00907"></a><span class="lineno"> 907</span> <span class="preprocessor">#pragma omp parallel for</span></div>
|
|
<div class="line"><a name="l00908"></a><span class="lineno"> 908</span> <span class="preprocessor"></span> <span class="keywordflow">for</span> (<span class="keywordtype">size_t</span> i = i0; i < i1; i++) {</div>
|
|
<div class="line"><a name="l00909"></a><span class="lineno"> 909</span>  <span class="keywordtype">float</span> * __restrict simi = res->get_val(i);</div>
|
|
<div class="line"><a name="l00910"></a><span class="lineno"> 910</span>  <span class="keywordtype">long</span> * __restrict idxi = res->get_ids (i);</div>
|
|
<div class="line"><a name="l00911"></a><span class="lineno"> 911</span>  <span class="keyword">const</span> <span class="keywordtype">float</span> *ip_line = ip_block + (i - i0) * (j1 - j0);</div>
|
|
<div class="line"><a name="l00912"></a><span class="lineno"> 912</span> </div>
|
|
<div class="line"><a name="l00913"></a><span class="lineno"> 913</span>  <span class="keywordflow">for</span> (<span class="keywordtype">size_t</span> j = j0; j < j1; j++) {</div>
|
|
<div class="line"><a name="l00914"></a><span class="lineno"> 914</span>  <span class="keywordtype">float</span> ip = *ip_line++;</div>
|
|
<div class="line"><a name="l00915"></a><span class="lineno"> 915</span>  <span class="keywordtype">float</span> dis = x_norms[i] + y_norms[j] - 2 * ip;</div>
|
|
<div class="line"><a name="l00916"></a><span class="lineno"> 916</span> </div>
|
|
<div class="line"><a name="l00917"></a><span class="lineno"> 917</span>  dis = corr (dis, i, j);</div>
|
|
<div class="line"><a name="l00918"></a><span class="lineno"> 918</span> </div>
|
|
<div class="line"><a name="l00919"></a><span class="lineno"> 919</span>  <span class="keywordflow">if</span> (dis < simi[0]) {</div>
|
|
<div class="line"><a name="l00920"></a><span class="lineno"> 920</span>  maxheap_pop (k, simi, idxi);</div>
|
|
<div class="line"><a name="l00921"></a><span class="lineno"> 921</span>  maxheap_push (k, simi, idxi, dis, j);</div>
|
|
<div class="line"><a name="l00922"></a><span class="lineno"> 922</span>  }</div>
|
|
<div class="line"><a name="l00923"></a><span class="lineno"> 923</span>  }</div>
|
|
<div class="line"><a name="l00924"></a><span class="lineno"> 924</span>  }</div>
|
|
<div class="line"><a name="l00925"></a><span class="lineno"> 925</span>  }</div>
|
|
<div class="line"><a name="l00926"></a><span class="lineno"> 926</span>  }</div>
|
|
<div class="line"><a name="l00927"></a><span class="lineno"> 927</span>  res->reorder ();</div>
|
|
<div class="line"><a name="l00928"></a><span class="lineno"> 928</span> </div>
|
|
<div class="line"><a name="l00929"></a><span class="lineno"> 929</span>  <span class="keyword">delete</span> [] ip_block;</div>
|
|
<div class="line"><a name="l00930"></a><span class="lineno"> 930</span>  <span class="keyword">delete</span> [] x_norms;</div>
|
|
<div class="line"><a name="l00931"></a><span class="lineno"> 931</span>  <span class="keyword">delete</span> [] y_norms;</div>
|
|
<div class="line"><a name="l00932"></a><span class="lineno"> 932</span> }</div>
|
|
<div class="line"><a name="l00933"></a><span class="lineno"> 933</span> </div>
|
|
<div class="line"><a name="l00934"></a><span class="lineno"> 934</span> </div>
|
|
<div class="line"><a name="l00935"></a><span class="lineno"> 935</span> </div>
|
|
<div class="line"><a name="l00936"></a><span class="lineno"> 936</span> </div>
|
|
<div class="line"><a name="l00937"></a><span class="lineno"> 937</span> </div>
|
|
<div class="line"><a name="l00938"></a><span class="lineno"> 938</span> </div>
|
|
<div class="line"><a name="l00939"></a><span class="lineno"> 939</span> </div>
|
|
<div class="line"><a name="l00940"></a><span class="lineno"> 940</span> </div>
|
|
<div class="line"><a name="l00941"></a><span class="lineno"> 941</span> </div>
|
|
<div class="line"><a name="l00942"></a><span class="lineno"> 942</span> <span class="comment">/*******************************************************</span></div>
|
|
<div class="line"><a name="l00943"></a><span class="lineno"> 943</span> <span class="comment"> * KNN driver functions</span></div>
|
|
<div class="line"><a name="l00944"></a><span class="lineno"> 944</span> <span class="comment"> *******************************************************/</span></div>
|
|
<div class="line"><a name="l00945"></a><span class="lineno"> 945</span> </div>
|
|
<div class="line"><a name="l00946"></a><span class="lineno"><a class="line" href="namespacefaiss.html#a880c7318971f866267a86945aaa61b17"> 946</a></span> <span class="keywordtype">void</span> <a class="code" href="namespacefaiss.html#a880c7318971f866267a86945aaa61b17">knn_inner_product</a> (<span class="keyword">const</span> <span class="keywordtype">float</span> * x,</div>
|
|
<div class="line"><a name="l00947"></a><span class="lineno"> 947</span>  <span class="keyword">const</span> <span class="keywordtype">float</span> * y,</div>
|
|
<div class="line"><a name="l00948"></a><span class="lineno"> 948</span>  <span class="keywordtype">size_t</span> d, <span class="keywordtype">size_t</span> nx, <span class="keywordtype">size_t</span> ny,</div>
|
|
<div class="line"><a name="l00949"></a><span class="lineno"> 949</span>  <a class="code" href="structfaiss_1_1HeapArray.html">float_minheap_array_t</a> * res)</div>
|
|
<div class="line"><a name="l00950"></a><span class="lineno"> 950</span> {</div>
|
|
<div class="line"><a name="l00951"></a><span class="lineno"> 951</span>  <span class="keywordflow">if</span> (d % 4 == 0 && nx < 20) {</div>
|
|
<div class="line"><a name="l00952"></a><span class="lineno"> 952</span>  knn_inner_product_sse (x, y, d, nx, ny, res);</div>
|
|
<div class="line"><a name="l00953"></a><span class="lineno"> 953</span>  } <span class="keywordflow">else</span> {</div>
|
|
<div class="line"><a name="l00954"></a><span class="lineno"> 954</span>  knn_inner_product_blas (x, y, d, nx, ny, res);</div>
|
|
<div class="line"><a name="l00955"></a><span class="lineno"> 955</span>  }</div>
|
|
<div class="line"><a name="l00956"></a><span class="lineno"> 956</span> }</div>
|
|
<div class="line"><a name="l00957"></a><span class="lineno"> 957</span> </div>
|
|
<div class="line"><a name="l00958"></a><span class="lineno"> 958</span> </div>
|
|
<div class="line"><a name="l00959"></a><span class="lineno"> 959</span> </div>
|
|
<div class="line"><a name="l00960"></a><span class="lineno"><a class="line" href="structfaiss_1_1NopDistanceCorrection.html"> 960</a></span> <span class="keyword">struct </span><a class="code" href="structfaiss_1_1NopDistanceCorrection.html">NopDistanceCorrection</a> {</div>
|
|
<div class="line"><a name="l00961"></a><span class="lineno"> 961</span>  <span class="keywordtype">float</span> operator()(<span class="keywordtype">float</span> dis, <span class="keywordtype">size_t</span> <span class="comment">/*qno*/</span>, <span class="keywordtype">size_t</span> <span class="comment">/*bno*/</span>)<span class="keyword"> const </span>{</div>
|
|
<div class="line"><a name="l00962"></a><span class="lineno"> 962</span>  <span class="keywordflow">return</span> dis;</div>
|
|
<div class="line"><a name="l00963"></a><span class="lineno"> 963</span>  }</div>
|
|
<div class="line"><a name="l00964"></a><span class="lineno"> 964</span> };</div>
|
|
<div class="line"><a name="l00965"></a><span class="lineno"> 965</span> </div>
|
|
<div class="line"><a name="l00966"></a><span class="lineno"><a class="line" href="namespacefaiss.html#a2f803e3d3b07cfab63699c89de161237"> 966</a></span> <span class="keywordtype">void</span> <a class="code" href="namespacefaiss.html#a2f803e3d3b07cfab63699c89de161237">knn_L2sqr</a> (<span class="keyword">const</span> <span class="keywordtype">float</span> * x,</div>
|
|
<div class="line"><a name="l00967"></a><span class="lineno"> 967</span>  <span class="keyword">const</span> <span class="keywordtype">float</span> * y,</div>
|
|
<div class="line"><a name="l00968"></a><span class="lineno"> 968</span>  <span class="keywordtype">size_t</span> d, <span class="keywordtype">size_t</span> nx, <span class="keywordtype">size_t</span> ny,</div>
|
|
<div class="line"><a name="l00969"></a><span class="lineno"> 969</span>  <a class="code" href="structfaiss_1_1HeapArray.html">float_maxheap_array_t</a> * res)</div>
|
|
<div class="line"><a name="l00970"></a><span class="lineno"> 970</span> {</div>
|
|
<div class="line"><a name="l00971"></a><span class="lineno"> 971</span>  <span class="keywordflow">if</span> (d % 4 == 0 && nx < 20) {</div>
|
|
<div class="line"><a name="l00972"></a><span class="lineno"> 972</span>  knn_L2sqr_sse (x, y, d, nx, ny, res);</div>
|
|
<div class="line"><a name="l00973"></a><span class="lineno"> 973</span>  } <span class="keywordflow">else</span> {</div>
|
|
<div class="line"><a name="l00974"></a><span class="lineno"> 974</span>  <a class="code" href="structfaiss_1_1NopDistanceCorrection.html">NopDistanceCorrection</a> nop;</div>
|
|
<div class="line"><a name="l00975"></a><span class="lineno"> 975</span>  knn_L2sqr_blas (x, y, d, nx, ny, res, nop);</div>
|
|
<div class="line"><a name="l00976"></a><span class="lineno"> 976</span>  }</div>
|
|
<div class="line"><a name="l00977"></a><span class="lineno"> 977</span> }</div>
|
|
<div class="line"><a name="l00978"></a><span class="lineno"> 978</span> </div>
|
|
<div class="line"><a name="l00979"></a><span class="lineno"><a class="line" href="structfaiss_1_1BaseShiftDistanceCorrection.html"> 979</a></span> <span class="keyword">struct </span><a class="code" href="structfaiss_1_1BaseShiftDistanceCorrection.html">BaseShiftDistanceCorrection</a> {</div>
|
|
<div class="line"><a name="l00980"></a><span class="lineno"> 980</span>  <span class="keyword">const</span> <span class="keywordtype">float</span> *base_shift;</div>
|
|
<div class="line"><a name="l00981"></a><span class="lineno"> 981</span>  <span class="keywordtype">float</span> operator()(<span class="keywordtype">float</span> dis, <span class="keywordtype">size_t</span> <span class="comment">/*qno*/</span>, <span class="keywordtype">size_t</span> bno)<span class="keyword"> const </span>{</div>
|
|
<div class="line"><a name="l00982"></a><span class="lineno"> 982</span>  <span class="keywordflow">return</span> dis - base_shift[bno];</div>
|
|
<div class="line"><a name="l00983"></a><span class="lineno"> 983</span>  }</div>
|
|
<div class="line"><a name="l00984"></a><span class="lineno"> 984</span> };</div>
|
|
<div class="line"><a name="l00985"></a><span class="lineno"> 985</span> </div>
|
|
<div class="line"><a name="l00986"></a><span class="lineno"><a class="line" href="namespacefaiss.html#a5eb1701e46123827966f2a56da893d1d"> 986</a></span> <span class="keywordtype">void</span> <a class="code" href="namespacefaiss.html#a5eb1701e46123827966f2a56da893d1d">knn_L2sqr_base_shift</a> (</div>
|
|
<div class="line"><a name="l00987"></a><span class="lineno"> 987</span>  <span class="keyword">const</span> <span class="keywordtype">float</span> * x,</div>
|
|
<div class="line"><a name="l00988"></a><span class="lineno"> 988</span>  <span class="keyword">const</span> <span class="keywordtype">float</span> * y,</div>
|
|
<div class="line"><a name="l00989"></a><span class="lineno"> 989</span>  <span class="keywordtype">size_t</span> d, <span class="keywordtype">size_t</span> nx, <span class="keywordtype">size_t</span> ny,</div>
|
|
<div class="line"><a name="l00990"></a><span class="lineno"> 990</span>  <a class="code" href="structfaiss_1_1HeapArray.html">float_maxheap_array_t</a> * res,</div>
|
|
<div class="line"><a name="l00991"></a><span class="lineno"> 991</span>  <span class="keyword">const</span> <span class="keywordtype">float</span> *base_shift)</div>
|
|
<div class="line"><a name="l00992"></a><span class="lineno"> 992</span> {</div>
|
|
<div class="line"><a name="l00993"></a><span class="lineno"> 993</span>  <a class="code" href="structfaiss_1_1BaseShiftDistanceCorrection.html">BaseShiftDistanceCorrection</a> corr = {base_shift};</div>
|
|
<div class="line"><a name="l00994"></a><span class="lineno"> 994</span>  knn_L2sqr_blas (x, y, d, nx, ny, res, corr);</div>
|
|
<div class="line"><a name="l00995"></a><span class="lineno"> 995</span> }</div>
|
|
<div class="line"><a name="l00996"></a><span class="lineno"> 996</span> </div>
|
|
<div class="line"><a name="l00997"></a><span class="lineno"> 997</span> </div>
|
|
<div class="line"><a name="l00998"></a><span class="lineno"> 998</span> </div>
|
|
<div class="line"><a name="l00999"></a><span class="lineno"> 999</span> <span class="comment">/***************************************************************************</span></div>
|
|
<div class="line"><a name="l01000"></a><span class="lineno"> 1000</span> <span class="comment"> * compute a subset of distances</span></div>
|
|
<div class="line"><a name="l01001"></a><span class="lineno"> 1001</span> <span class="comment"> ***************************************************************************/</span></div>
|
|
<div class="line"><a name="l01002"></a><span class="lineno"> 1002</span> </div>
|
|
<div class="line"><a name="l01003"></a><span class="lineno"> 1003</span> <span class="comment">/* compute the inner product between x and a subset y of ny vectors,</span></div>
|
|
<div class="line"><a name="l01004"></a><span class="lineno"> 1004</span> <span class="comment"> whose indices are given by idy. */</span></div>
|
|
<div class="line"><a name="l01005"></a><span class="lineno"> 1005</span> <span class="keywordtype">void</span> fvec_inner_products_by_idx (<span class="keywordtype">float</span> * __restrict ip,</div>
|
|
<div class="line"><a name="l01006"></a><span class="lineno"> 1006</span>  <span class="keyword">const</span> <span class="keywordtype">float</span> * x,</div>
|
|
<div class="line"><a name="l01007"></a><span class="lineno"> 1007</span>  <span class="keyword">const</span> <span class="keywordtype">float</span> * y,</div>
|
|
<div class="line"><a name="l01008"></a><span class="lineno"> 1008</span>  <span class="keyword">const</span> <span class="keywordtype">long</span> * __restrict ids, <span class="comment">/* for y vecs */</span></div>
|
|
<div class="line"><a name="l01009"></a><span class="lineno"> 1009</span>  <span class="keywordtype">size_t</span> d, <span class="keywordtype">size_t</span> nx, <span class="keywordtype">size_t</span> ny)</div>
|
|
<div class="line"><a name="l01010"></a><span class="lineno"> 1010</span> {</div>
|
|
<div class="line"><a name="l01011"></a><span class="lineno"> 1011</span> <span class="preprocessor">#pragma omp parallel for</span></div>
|
|
<div class="line"><a name="l01012"></a><span class="lineno"> 1012</span> <span class="preprocessor"></span> <span class="keywordflow">for</span> (<span class="keywordtype">size_t</span> j = 0; j < nx; j++) {</div>
|
|
<div class="line"><a name="l01013"></a><span class="lineno"> 1013</span>  <span class="keyword">const</span> <span class="keywordtype">long</span> * __restrict idsj = ids + j * ny;</div>
|
|
<div class="line"><a name="l01014"></a><span class="lineno"> 1014</span>  <span class="keyword">const</span> <span class="keywordtype">float</span> * xj = x + j * d;</div>
|
|
<div class="line"><a name="l01015"></a><span class="lineno"> 1015</span>  <span class="keywordtype">float</span> * __restrict ipj = ip + j * ny;</div>
|
|
<div class="line"><a name="l01016"></a><span class="lineno"> 1016</span>  <span class="keywordflow">for</span> (<span class="keywordtype">size_t</span> i = 0; i < ny; i++) {</div>
|
|
<div class="line"><a name="l01017"></a><span class="lineno"> 1017</span>  <span class="keywordflow">if</span> (idsj[i] < 0)</div>
|
|
<div class="line"><a name="l01018"></a><span class="lineno"> 1018</span>  <span class="keywordflow">continue</span>;</div>
|
|
<div class="line"><a name="l01019"></a><span class="lineno"> 1019</span>  ipj[i] = fvec_inner_product (xj, y + d * idsj[i], d);</div>
|
|
<div class="line"><a name="l01020"></a><span class="lineno"> 1020</span>  }</div>
|
|
<div class="line"><a name="l01021"></a><span class="lineno"> 1021</span>  }</div>
|
|
<div class="line"><a name="l01022"></a><span class="lineno"> 1022</span> }</div>
|
|
<div class="line"><a name="l01023"></a><span class="lineno"> 1023</span> </div>
|
|
<div class="line"><a name="l01024"></a><span class="lineno"> 1024</span> <span class="comment">/* compute the inner product between x and a subset y of ny vectors,</span></div>
|
|
<div class="line"><a name="l01025"></a><span class="lineno"> 1025</span> <span class="comment"> whose indices are given by idy. */</span></div>
|
|
<div class="line"><a name="l01026"></a><span class="lineno"> 1026</span> <span class="keywordtype">void</span> fvec_L2sqr_by_idx (<span class="keywordtype">float</span> * __restrict dis,</div>
|
|
<div class="line"><a name="l01027"></a><span class="lineno"> 1027</span>  <span class="keyword">const</span> <span class="keywordtype">float</span> * x,</div>
|
|
<div class="line"><a name="l01028"></a><span class="lineno"> 1028</span>  <span class="keyword">const</span> <span class="keywordtype">float</span> * y,</div>
|
|
<div class="line"><a name="l01029"></a><span class="lineno"> 1029</span>  <span class="keyword">const</span> <span class="keywordtype">long</span> * __restrict ids, <span class="comment">/* ids of y vecs */</span></div>
|
|
<div class="line"><a name="l01030"></a><span class="lineno"> 1030</span>  <span class="keywordtype">size_t</span> d, <span class="keywordtype">size_t</span> nx, <span class="keywordtype">size_t</span> ny)</div>
|
|
<div class="line"><a name="l01031"></a><span class="lineno"> 1031</span> {</div>
|
|
<div class="line"><a name="l01032"></a><span class="lineno"> 1032</span> <span class="preprocessor">#pragma omp parallel for</span></div>
|
|
<div class="line"><a name="l01033"></a><span class="lineno"> 1033</span> <span class="preprocessor"></span> <span class="keywordflow">for</span> (<span class="keywordtype">size_t</span> j = 0; j < nx; j++) {</div>
|
|
<div class="line"><a name="l01034"></a><span class="lineno"> 1034</span>  <span class="keyword">const</span> <span class="keywordtype">long</span> * __restrict idsj = ids + j * ny;</div>
|
|
<div class="line"><a name="l01035"></a><span class="lineno"> 1035</span>  <span class="keyword">const</span> <span class="keywordtype">float</span> * xj = x + j * d;</div>
|
|
<div class="line"><a name="l01036"></a><span class="lineno"> 1036</span>  <span class="keywordtype">float</span> * __restrict disj = dis + j * ny;</div>
|
|
<div class="line"><a name="l01037"></a><span class="lineno"> 1037</span>  <span class="keywordflow">for</span> (<span class="keywordtype">size_t</span> i = 0; i < ny; i++) {</div>
|
|
<div class="line"><a name="l01038"></a><span class="lineno"> 1038</span>  <span class="keywordflow">if</span> (idsj[i] < 0)</div>
|
|
<div class="line"><a name="l01039"></a><span class="lineno"> 1039</span>  <span class="keywordflow">continue</span>;</div>
|
|
<div class="line"><a name="l01040"></a><span class="lineno"> 1040</span>  disj[i] = <a class="code" href="namespacefaiss.html#a7466bd32de31640860393a701eaac5ad">fvec_L2sqr</a> (xj, y + d * idsj[i], d);</div>
|
|
<div class="line"><a name="l01041"></a><span class="lineno"> 1041</span>  }</div>
|
|
<div class="line"><a name="l01042"></a><span class="lineno"> 1042</span>  }</div>
|
|
<div class="line"><a name="l01043"></a><span class="lineno"> 1043</span> }</div>
|
|
<div class="line"><a name="l01044"></a><span class="lineno"> 1044</span> </div>
|
|
<div class="line"><a name="l01045"></a><span class="lineno"> 1045</span> </div>
|
|
<div class="line"><a name="l01046"></a><span class="lineno"> 1046</span> </div>
|
|
<div class="line"><a name="l01047"></a><span class="lineno"> 1047</span> </div>
|
|
<div class="line"><a name="l01048"></a><span class="lineno"> 1048</span> </div>
|
|
<div class="line"><a name="l01049"></a><span class="lineno"> 1049</span> <span class="comment">/* Find the nearest neighbors for nx queries in a set of ny vectors</span></div>
|
|
<div class="line"><a name="l01050"></a><span class="lineno"> 1050</span> <span class="comment"> indexed by ids. May be useful for re-ranking a pre-selected vector list */</span></div>
|
|
<div class="line"><a name="l01051"></a><span class="lineno"> 1051</span> <span class="keywordtype">void</span> knn_inner_products_by_idx (<span class="keyword">const</span> <span class="keywordtype">float</span> * x,</div>
|
|
<div class="line"><a name="l01052"></a><span class="lineno"> 1052</span>  <span class="keyword">const</span> <span class="keywordtype">float</span> * y,</div>
|
|
<div class="line"><a name="l01053"></a><span class="lineno"> 1053</span>  <span class="keyword">const</span> <span class="keywordtype">long</span> * ids,</div>
|
|
<div class="line"><a name="l01054"></a><span class="lineno"> 1054</span>  <span class="keywordtype">size_t</span> d, <span class="keywordtype">size_t</span> nx, <span class="keywordtype">size_t</span> ny,</div>
|
|
<div class="line"><a name="l01055"></a><span class="lineno"> 1055</span>  float_minheap_array_t * res)</div>
|
|
<div class="line"><a name="l01056"></a><span class="lineno"> 1056</span> {</div>
|
|
<div class="line"><a name="l01057"></a><span class="lineno"> 1057</span>  <span class="keywordtype">size_t</span> k = res->k;</div>
|
|
<div class="line"><a name="l01058"></a><span class="lineno"> 1058</span> </div>
|
|
<div class="line"><a name="l01059"></a><span class="lineno"> 1059</span> <span class="preprocessor">#pragma omp parallel for</span></div>
|
|
<div class="line"><a name="l01060"></a><span class="lineno"> 1060</span> <span class="preprocessor"></span> <span class="keywordflow">for</span> (<span class="keywordtype">size_t</span> i = 0; i < nx; i++) {</div>
|
|
<div class="line"><a name="l01061"></a><span class="lineno"> 1061</span>  <span class="keyword">const</span> <span class="keywordtype">float</span> * x_ = x + i * d;</div>
|
|
<div class="line"><a name="l01062"></a><span class="lineno"> 1062</span>  <span class="keyword">const</span> <span class="keywordtype">long</span> * idsi = ids + i * ny;</div>
|
|
<div class="line"><a name="l01063"></a><span class="lineno"> 1063</span>  <span class="keywordtype">size_t</span> j;</div>
|
|
<div class="line"><a name="l01064"></a><span class="lineno"> 1064</span>  <span class="keywordtype">float</span> * __restrict simi = res->get_val(i);</div>
|
|
<div class="line"><a name="l01065"></a><span class="lineno"> 1065</span>  <span class="keywordtype">long</span> * __restrict idxi = res->get_ids (i);</div>
|
|
<div class="line"><a name="l01066"></a><span class="lineno"> 1066</span>  minheap_heapify (k, simi, idxi);</div>
|
|
<div class="line"><a name="l01067"></a><span class="lineno"> 1067</span> </div>
|
|
<div class="line"><a name="l01068"></a><span class="lineno"> 1068</span>  <span class="keywordflow">for</span> (j = 0; j < ny; j++) {</div>
|
|
<div class="line"><a name="l01069"></a><span class="lineno"> 1069</span>  <span class="keywordflow">if</span> (idsi[j] < 0) <span class="keywordflow">break</span>;</div>
|
|
<div class="line"><a name="l01070"></a><span class="lineno"> 1070</span>  <span class="keywordtype">float</span> ip = fvec_inner_product (x_, y + d * idsi[j], d);</div>
|
|
<div class="line"><a name="l01071"></a><span class="lineno"> 1071</span> </div>
|
|
<div class="line"><a name="l01072"></a><span class="lineno"> 1072</span>  <span class="keywordflow">if</span> (ip > simi[0]) {</div>
|
|
<div class="line"><a name="l01073"></a><span class="lineno"> 1073</span>  minheap_pop (k, simi, idxi);</div>
|
|
<div class="line"><a name="l01074"></a><span class="lineno"> 1074</span>  minheap_push (k, simi, idxi, ip, idsi[j]);</div>
|
|
<div class="line"><a name="l01075"></a><span class="lineno"> 1075</span>  }</div>
|
|
<div class="line"><a name="l01076"></a><span class="lineno"> 1076</span>  }</div>
|
|
<div class="line"><a name="l01077"></a><span class="lineno"> 1077</span>  minheap_reorder (k, simi, idxi);</div>
|
|
<div class="line"><a name="l01078"></a><span class="lineno"> 1078</span>  }</div>
|
|
<div class="line"><a name="l01079"></a><span class="lineno"> 1079</span> </div>
|
|
<div class="line"><a name="l01080"></a><span class="lineno"> 1080</span> }</div>
|
|
<div class="line"><a name="l01081"></a><span class="lineno"> 1081</span> </div>
|
|
<div class="line"><a name="l01082"></a><span class="lineno"> 1082</span> <span class="keywordtype">void</span> knn_L2sqr_by_idx (<span class="keyword">const</span> <span class="keywordtype">float</span> * x,</div>
|
|
<div class="line"><a name="l01083"></a><span class="lineno"> 1083</span>  <span class="keyword">const</span> <span class="keywordtype">float</span> * y,</div>
|
|
<div class="line"><a name="l01084"></a><span class="lineno"> 1084</span>  <span class="keyword">const</span> <span class="keywordtype">long</span> * __restrict ids,</div>
|
|
<div class="line"><a name="l01085"></a><span class="lineno"> 1085</span>  <span class="keywordtype">size_t</span> d, <span class="keywordtype">size_t</span> nx, <span class="keywordtype">size_t</span> ny,</div>
|
|
<div class="line"><a name="l01086"></a><span class="lineno"> 1086</span>  float_maxheap_array_t * res)</div>
|
|
<div class="line"><a name="l01087"></a><span class="lineno"> 1087</span> {</div>
|
|
<div class="line"><a name="l01088"></a><span class="lineno"> 1088</span>  <span class="keywordtype">size_t</span> k = res->k;</div>
|
|
<div class="line"><a name="l01089"></a><span class="lineno"> 1089</span> </div>
|
|
<div class="line"><a name="l01090"></a><span class="lineno"> 1090</span> <span class="preprocessor">#pragma omp parallel for</span></div>
|
|
<div class="line"><a name="l01091"></a><span class="lineno"> 1091</span> <span class="preprocessor"></span> <span class="keywordflow">for</span> (<span class="keywordtype">size_t</span> i = 0; i < nx; i++) {</div>
|
|
<div class="line"><a name="l01092"></a><span class="lineno"> 1092</span>  <span class="keyword">const</span> <span class="keywordtype">float</span> * x_ = x + i * d;</div>
|
|
<div class="line"><a name="l01093"></a><span class="lineno"> 1093</span>  <span class="keyword">const</span> <span class="keywordtype">long</span> * __restrict idsi = ids + i * ny;</div>
|
|
<div class="line"><a name="l01094"></a><span class="lineno"> 1094</span>  <span class="keywordtype">float</span> * __restrict simi = res->get_val(i);</div>
|
|
<div class="line"><a name="l01095"></a><span class="lineno"> 1095</span>  <span class="keywordtype">long</span> * __restrict idxi = res->get_ids (i);</div>
|
|
<div class="line"><a name="l01096"></a><span class="lineno"> 1096</span>  maxheap_heapify (res->k, simi, idxi);</div>
|
|
<div class="line"><a name="l01097"></a><span class="lineno"> 1097</span>  <span class="keywordflow">for</span> (<span class="keywordtype">size_t</span> j = 0; j < ny; j++) {</div>
|
|
<div class="line"><a name="l01098"></a><span class="lineno"> 1098</span>  <span class="keywordtype">float</span> disij = <a class="code" href="namespacefaiss.html#a7466bd32de31640860393a701eaac5ad">fvec_L2sqr</a> (x_, y + d * idsi[j], d);</div>
|
|
<div class="line"><a name="l01099"></a><span class="lineno"> 1099</span> </div>
|
|
<div class="line"><a name="l01100"></a><span class="lineno"> 1100</span>  <span class="keywordflow">if</span> (disij < simi[0]) {</div>
|
|
<div class="line"><a name="l01101"></a><span class="lineno"> 1101</span>  maxheap_pop (k, simi, idxi);</div>
|
|
<div class="line"><a name="l01102"></a><span class="lineno"> 1102</span>  maxheap_push (k, simi, idxi, disij, idsi[j]);</div>
|
|
<div class="line"><a name="l01103"></a><span class="lineno"> 1103</span>  }</div>
|
|
<div class="line"><a name="l01104"></a><span class="lineno"> 1104</span>  }</div>
|
|
<div class="line"><a name="l01105"></a><span class="lineno"> 1105</span>  maxheap_reorder (res->k, simi, idxi);</div>
|
|
<div class="line"><a name="l01106"></a><span class="lineno"> 1106</span>  }</div>
|
|
<div class="line"><a name="l01107"></a><span class="lineno"> 1107</span> </div>
|
|
<div class="line"><a name="l01108"></a><span class="lineno"> 1108</span> }</div>
|
|
<div class="line"><a name="l01109"></a><span class="lineno"> 1109</span> </div>
|
|
<div class="line"><a name="l01110"></a><span class="lineno"> 1110</span> </div>
|
|
<div class="line"><a name="l01111"></a><span class="lineno"> 1111</span> </div>
|
|
<div class="line"><a name="l01112"></a><span class="lineno"> 1112</span> </div>
|
|
<div class="line"><a name="l01113"></a><span class="lineno"> 1113</span> </div>
|
|
<div class="line"><a name="l01114"></a><span class="lineno"> 1114</span> <span class="comment">/***************************************************************************</span></div>
|
|
<div class="line"><a name="l01115"></a><span class="lineno"> 1115</span> <span class="comment"> * Range search</span></div>
|
|
<div class="line"><a name="l01116"></a><span class="lineno"> 1116</span> <span class="comment"> ***************************************************************************/</span></div>
|
|
<div class="line"><a name="l01117"></a><span class="lineno"> 1117</span> <span class="comment"></span></div>
|
|
<div class="line"><a name="l01118"></a><span class="lineno"> 1118</span> <span class="comment">/** Find the nearest neighbors for nx queries in a set of ny vectors</span></div>
|
|
<div class="line"><a name="l01119"></a><span class="lineno"> 1119</span> <span class="comment"> * compute_l2 = compute pairwise squared L2 distance rather than inner prod</span></div>
|
|
<div class="line"><a name="l01120"></a><span class="lineno"> 1120</span> <span class="comment"> */</span></div>
|
|
<div class="line"><a name="l01121"></a><span class="lineno"> 1121</span> <span class="keyword">template</span> <<span class="keywordtype">bool</span> compute_l2></div>
|
|
<div class="line"><a name="l01122"></a><span class="lineno"> 1122</span> <span class="keyword">static</span> <span class="keywordtype">void</span> range_search_blas (</div>
|
|
<div class="line"><a name="l01123"></a><span class="lineno"> 1123</span>  <span class="keyword">const</span> <span class="keywordtype">float</span> * x,</div>
|
|
<div class="line"><a name="l01124"></a><span class="lineno"> 1124</span>  <span class="keyword">const</span> <span class="keywordtype">float</span> * y,</div>
|
|
<div class="line"><a name="l01125"></a><span class="lineno"> 1125</span>  <span class="keywordtype">size_t</span> d, <span class="keywordtype">size_t</span> nx, <span class="keywordtype">size_t</span> ny,</div>
|
|
<div class="line"><a name="l01126"></a><span class="lineno"> 1126</span>  <span class="keywordtype">float</span> radius,</div>
|
|
<div class="line"><a name="l01127"></a><span class="lineno"> 1127</span>  RangeSearchResult *result)</div>
|
|
<div class="line"><a name="l01128"></a><span class="lineno"> 1128</span> {</div>
|
|
<div class="line"><a name="l01129"></a><span class="lineno"> 1129</span> </div>
|
|
<div class="line"><a name="l01130"></a><span class="lineno"> 1130</span>  <span class="comment">// BLAS does not like empty matrices</span></div>
|
|
<div class="line"><a name="l01131"></a><span class="lineno"> 1131</span>  <span class="keywordflow">if</span> (nx == 0 || ny == 0) <span class="keywordflow">return</span>;</div>
|
|
<div class="line"><a name="l01132"></a><span class="lineno"> 1132</span> </div>
|
|
<div class="line"><a name="l01133"></a><span class="lineno"> 1133</span>  <span class="comment">/* block sizes */</span></div>
|
|
<div class="line"><a name="l01134"></a><span class="lineno"> 1134</span>  <span class="keyword">const</span> <span class="keywordtype">size_t</span> bs_x = 4096, bs_y = 1024;</div>
|
|
<div class="line"><a name="l01135"></a><span class="lineno"> 1135</span>  <span class="comment">// const size_t bs_x = 16, bs_y = 16;</span></div>
|
|
<div class="line"><a name="l01136"></a><span class="lineno"> 1136</span>  <span class="keywordtype">float</span> *ip_block = <span class="keyword">new</span> <span class="keywordtype">float</span>[bs_x * bs_y];</div>
|
|
<div class="line"><a name="l01137"></a><span class="lineno"> 1137</span> </div>
|
|
<div class="line"><a name="l01138"></a><span class="lineno"> 1138</span>  <span class="keywordtype">float</span> *x_norms = <span class="keyword">nullptr</span>, *y_norms = <span class="keyword">nullptr</span>;</div>
|
|
<div class="line"><a name="l01139"></a><span class="lineno"> 1139</span> </div>
|
|
<div class="line"><a name="l01140"></a><span class="lineno"> 1140</span>  <span class="keywordflow">if</span> (compute_l2) {</div>
|
|
<div class="line"><a name="l01141"></a><span class="lineno"> 1141</span>  x_norms = <span class="keyword">new</span> <span class="keywordtype">float</span>[nx];</div>
|
|
<div class="line"><a name="l01142"></a><span class="lineno"> 1142</span>  fvec_norms_L2sqr (x_norms, x, d, nx);</div>
|
|
<div class="line"><a name="l01143"></a><span class="lineno"> 1143</span>  y_norms = <span class="keyword">new</span> <span class="keywordtype">float</span>[ny];</div>
|
|
<div class="line"><a name="l01144"></a><span class="lineno"> 1144</span>  fvec_norms_L2sqr (y_norms, y, d, ny);</div>
|
|
<div class="line"><a name="l01145"></a><span class="lineno"> 1145</span>  }</div>
|
|
<div class="line"><a name="l01146"></a><span class="lineno"> 1146</span> </div>
|
|
<div class="line"><a name="l01147"></a><span class="lineno"> 1147</span>  std::vector <RangeSearchPartialResult *> partial_results;</div>
|
|
<div class="line"><a name="l01148"></a><span class="lineno"> 1148</span> </div>
|
|
<div class="line"><a name="l01149"></a><span class="lineno"> 1149</span>  <span class="keywordflow">for</span> (<span class="keywordtype">size_t</span> j0 = 0; j0 < ny; j0 += bs_y) {</div>
|
|
<div class="line"><a name="l01150"></a><span class="lineno"> 1150</span>  <span class="keywordtype">size_t</span> j1 = j0 + bs_y;</div>
|
|
<div class="line"><a name="l01151"></a><span class="lineno"> 1151</span>  <span class="keywordflow">if</span> (j1 > ny) j1 = ny;</div>
|
|
<div class="line"><a name="l01152"></a><span class="lineno"> 1152</span>  RangeSearchPartialResult * pres = <span class="keyword">new</span> RangeSearchPartialResult (result);</div>
|
|
<div class="line"><a name="l01153"></a><span class="lineno"> 1153</span>  partial_results.push_back (pres);</div>
|
|
<div class="line"><a name="l01154"></a><span class="lineno"> 1154</span> </div>
|
|
<div class="line"><a name="l01155"></a><span class="lineno"> 1155</span>  <span class="keywordflow">for</span> (<span class="keywordtype">size_t</span> i0 = 0; i0 < nx; i0 += bs_x) {</div>
|
|
<div class="line"><a name="l01156"></a><span class="lineno"> 1156</span>  <span class="keywordtype">size_t</span> i1 = i0 + bs_x;</div>
|
|
<div class="line"><a name="l01157"></a><span class="lineno"> 1157</span>  <span class="keywordflow">if</span>(i1 > nx) i1 = nx;</div>
|
|
<div class="line"><a name="l01158"></a><span class="lineno"> 1158</span> </div>
|
|
<div class="line"><a name="l01159"></a><span class="lineno"> 1159</span>  <span class="comment">/* compute the actual dot products */</span></div>
|
|
<div class="line"><a name="l01160"></a><span class="lineno"> 1160</span>  {</div>
|
|
<div class="line"><a name="l01161"></a><span class="lineno"> 1161</span>  <span class="keywordtype">float</span> one = 1, zero = 0;</div>
|
|
<div class="line"><a name="l01162"></a><span class="lineno"> 1162</span>  FINTEGER nyi = j1 - j0, nxi = i1 - i0, di = d;</div>
|
|
<div class="line"><a name="l01163"></a><span class="lineno"> 1163</span>  sgemm_ (<span class="stringliteral">"Transpose"</span>, <span class="stringliteral">"Not transpose"</span>, &nyi, &nxi, &di, &one,</div>
|
|
<div class="line"><a name="l01164"></a><span class="lineno"> 1164</span>  y + j0 * d, &di,</div>
|
|
<div class="line"><a name="l01165"></a><span class="lineno"> 1165</span>  x + i0 * d, &di, &zero,</div>
|
|
<div class="line"><a name="l01166"></a><span class="lineno"> 1166</span>  ip_block, &nyi);</div>
|
|
<div class="line"><a name="l01167"></a><span class="lineno"> 1167</span>  }</div>
|
|
<div class="line"><a name="l01168"></a><span class="lineno"> 1168</span> </div>
|
|
<div class="line"><a name="l01169"></a><span class="lineno"> 1169</span> </div>
|
|
<div class="line"><a name="l01170"></a><span class="lineno"> 1170</span>  <span class="keywordflow">for</span> (<span class="keywordtype">size_t</span> i = i0; i < i1; i++) {</div>
|
|
<div class="line"><a name="l01171"></a><span class="lineno"> 1171</span>  <span class="keyword">const</span> <span class="keywordtype">float</span> *ip_line = ip_block + (i - i0) * (j1 - j0);</div>
|
|
<div class="line"><a name="l01172"></a><span class="lineno"> 1172</span> </div>
|
|
<div class="line"><a name="l01173"></a><span class="lineno"> 1173</span>  RangeSearchPartialResult::QueryResult & qres =</div>
|
|
<div class="line"><a name="l01174"></a><span class="lineno"> 1174</span>  pres->new_result (i);</div>
|
|
<div class="line"><a name="l01175"></a><span class="lineno"> 1175</span> </div>
|
|
<div class="line"><a name="l01176"></a><span class="lineno"> 1176</span>  <span class="keywordflow">for</span> (<span class="keywordtype">size_t</span> j = j0; j < j1; j++) {</div>
|
|
<div class="line"><a name="l01177"></a><span class="lineno"> 1177</span>  <span class="keywordtype">float</span> ip = *ip_line++;</div>
|
|
<div class="line"><a name="l01178"></a><span class="lineno"> 1178</span>  <span class="keywordflow">if</span> (compute_l2) {</div>
|
|
<div class="line"><a name="l01179"></a><span class="lineno"> 1179</span>  <span class="keywordtype">float</span> dis = x_norms[i] + y_norms[j] - 2 * ip;</div>
|
|
<div class="line"><a name="l01180"></a><span class="lineno"> 1180</span>  <span class="keywordflow">if</span> (dis < radius) {</div>
|
|
<div class="line"><a name="l01181"></a><span class="lineno"> 1181</span>  qres.add (dis, j);</div>
|
|
<div class="line"><a name="l01182"></a><span class="lineno"> 1182</span>  }</div>
|
|
<div class="line"><a name="l01183"></a><span class="lineno"> 1183</span>  } <span class="keywordflow">else</span> {</div>
|
|
<div class="line"><a name="l01184"></a><span class="lineno"> 1184</span>  <span class="keywordflow">if</span> (ip > radius) {</div>
|
|
<div class="line"><a name="l01185"></a><span class="lineno"> 1185</span>  qres.add (ip, j);</div>
|
|
<div class="line"><a name="l01186"></a><span class="lineno"> 1186</span>  }</div>
|
|
<div class="line"><a name="l01187"></a><span class="lineno"> 1187</span>  }</div>
|
|
<div class="line"><a name="l01188"></a><span class="lineno"> 1188</span>  }</div>
|
|
<div class="line"><a name="l01189"></a><span class="lineno"> 1189</span>  }</div>
|
|
<div class="line"><a name="l01190"></a><span class="lineno"> 1190</span>  }</div>
|
|
<div class="line"><a name="l01191"></a><span class="lineno"> 1191</span> </div>
|
|
<div class="line"><a name="l01192"></a><span class="lineno"> 1192</span>  }</div>
|
|
<div class="line"><a name="l01193"></a><span class="lineno"> 1193</span>  <span class="keyword">delete</span> [] ip_block;</div>
|
|
<div class="line"><a name="l01194"></a><span class="lineno"> 1194</span>  <span class="keyword">delete</span> [] x_norms;</div>
|
|
<div class="line"><a name="l01195"></a><span class="lineno"> 1195</span>  <span class="keyword">delete</span> [] y_norms;</div>
|
|
<div class="line"><a name="l01196"></a><span class="lineno"> 1196</span> </div>
|
|
<div class="line"><a name="l01197"></a><span class="lineno"> 1197</span>  { <span class="comment">// merge the partial results</span></div>
|
|
<div class="line"><a name="l01198"></a><span class="lineno"> 1198</span>  <span class="keywordtype">int</span> npres = partial_results.size();</div>
|
|
<div class="line"><a name="l01199"></a><span class="lineno"> 1199</span>  <span class="comment">// count</span></div>
|
|
<div class="line"><a name="l01200"></a><span class="lineno"> 1200</span>  <span class="keywordflow">for</span> (<span class="keywordtype">size_t</span> i = 0; i < nx; i++) {</div>
|
|
<div class="line"><a name="l01201"></a><span class="lineno"> 1201</span>  <span class="keywordflow">for</span> (<span class="keywordtype">int</span> j = 0; j < npres; j++)</div>
|
|
<div class="line"><a name="l01202"></a><span class="lineno"> 1202</span>  result->lims[i] += partial_results[j]->queries[i].nres;</div>
|
|
<div class="line"><a name="l01203"></a><span class="lineno"> 1203</span>  }</div>
|
|
<div class="line"><a name="l01204"></a><span class="lineno"> 1204</span>  result->do_allocation ();</div>
|
|
<div class="line"><a name="l01205"></a><span class="lineno"> 1205</span>  <span class="keywordflow">for</span> (<span class="keywordtype">int</span> j = 0; j < npres; j++) {</div>
|
|
<div class="line"><a name="l01206"></a><span class="lineno"> 1206</span>  partial_results[j]->set_result (<span class="keyword">true</span>);</div>
|
|
<div class="line"><a name="l01207"></a><span class="lineno"> 1207</span>  <span class="keyword">delete</span> partial_results[j];</div>
|
|
<div class="line"><a name="l01208"></a><span class="lineno"> 1208</span>  }</div>
|
|
<div class="line"><a name="l01209"></a><span class="lineno"> 1209</span> </div>
|
|
<div class="line"><a name="l01210"></a><span class="lineno"> 1210</span>  <span class="comment">// reset the limits</span></div>
|
|
<div class="line"><a name="l01211"></a><span class="lineno"> 1211</span>  <span class="keywordflow">for</span> (<span class="keywordtype">size_t</span> i = nx; i > 0; i--) {</div>
|
|
<div class="line"><a name="l01212"></a><span class="lineno"> 1212</span>  result->lims [i] = result->lims [i - 1];</div>
|
|
<div class="line"><a name="l01213"></a><span class="lineno"> 1213</span>  }</div>
|
|
<div class="line"><a name="l01214"></a><span class="lineno"> 1214</span>  result->lims [0] = 0;</div>
|
|
<div class="line"><a name="l01215"></a><span class="lineno"> 1215</span>  }</div>
|
|
<div class="line"><a name="l01216"></a><span class="lineno"> 1216</span> }</div>
|
|
<div class="line"><a name="l01217"></a><span class="lineno"> 1217</span> </div>
|
|
<div class="line"><a name="l01218"></a><span class="lineno"> 1218</span> </div>
|
|
<div class="line"><a name="l01219"></a><span class="lineno"> 1219</span> <span class="keyword">template</span> <<span class="keywordtype">bool</span> compute_l2></div>
|
|
<div class="line"><a name="l01220"></a><span class="lineno"> 1220</span> <span class="keyword">static</span> <span class="keywordtype">void</span> range_search_sse (<span class="keyword">const</span> <span class="keywordtype">float</span> * x,</div>
|
|
<div class="line"><a name="l01221"></a><span class="lineno"> 1221</span>  <span class="keyword">const</span> <span class="keywordtype">float</span> * y,</div>
|
|
<div class="line"><a name="l01222"></a><span class="lineno"> 1222</span>  <span class="keywordtype">size_t</span> d, <span class="keywordtype">size_t</span> nx, <span class="keywordtype">size_t</span> ny,</div>
|
|
<div class="line"><a name="l01223"></a><span class="lineno"> 1223</span>  <span class="keywordtype">float</span> radius,</div>
|
|
<div class="line"><a name="l01224"></a><span class="lineno"> 1224</span>  RangeSearchResult *res)</div>
|
|
<div class="line"><a name="l01225"></a><span class="lineno"> 1225</span> {</div>
|
|
<div class="line"><a name="l01226"></a><span class="lineno"> 1226</span>  FAISS_THROW_IF_NOT (d % 4 == 0);</div>
|
|
<div class="line"><a name="l01227"></a><span class="lineno"> 1227</span> </div>
|
|
<div class="line"><a name="l01228"></a><span class="lineno"> 1228</span> <span class="preprocessor">#pragma omp parallel</span></div>
|
|
<div class="line"><a name="l01229"></a><span class="lineno"> 1229</span> <span class="preprocessor"></span> {</div>
|
|
<div class="line"><a name="l01230"></a><span class="lineno"> 1230</span>  RangeSearchPartialResult pres (res);</div>
|
|
<div class="line"><a name="l01231"></a><span class="lineno"> 1231</span> </div>
|
|
<div class="line"><a name="l01232"></a><span class="lineno"> 1232</span> <span class="preprocessor">#pragma omp for</span></div>
|
|
<div class="line"><a name="l01233"></a><span class="lineno"> 1233</span> <span class="preprocessor"></span> <span class="keywordflow">for</span> (<span class="keywordtype">size_t</span> i = 0; i < nx; i++) {</div>
|
|
<div class="line"><a name="l01234"></a><span class="lineno"> 1234</span>  <span class="keyword">const</span> <span class="keywordtype">float</span> * x_ = x + i * d;</div>
|
|
<div class="line"><a name="l01235"></a><span class="lineno"> 1235</span>  <span class="keyword">const</span> <span class="keywordtype">float</span> * y_ = y;</div>
|
|
<div class="line"><a name="l01236"></a><span class="lineno"> 1236</span>  <span class="keywordtype">size_t</span> j;</div>
|
|
<div class="line"><a name="l01237"></a><span class="lineno"> 1237</span> </div>
|
|
<div class="line"><a name="l01238"></a><span class="lineno"> 1238</span>  RangeSearchPartialResult::QueryResult & qres =</div>
|
|
<div class="line"><a name="l01239"></a><span class="lineno"> 1239</span>  pres.new_result (i);</div>
|
|
<div class="line"><a name="l01240"></a><span class="lineno"> 1240</span> </div>
|
|
<div class="line"><a name="l01241"></a><span class="lineno"> 1241</span>  <span class="keywordflow">for</span> (j = 0; j < ny; j++) {</div>
|
|
<div class="line"><a name="l01242"></a><span class="lineno"> 1242</span>  <span class="keywordflow">if</span> (compute_l2) {</div>
|
|
<div class="line"><a name="l01243"></a><span class="lineno"> 1243</span>  <span class="keywordtype">float</span> disij = <a class="code" href="namespacefaiss.html#a7466bd32de31640860393a701eaac5ad">fvec_L2sqr</a> (x_, y_, d);</div>
|
|
<div class="line"><a name="l01244"></a><span class="lineno"> 1244</span>  <span class="keywordflow">if</span> (disij < radius) {</div>
|
|
<div class="line"><a name="l01245"></a><span class="lineno"> 1245</span>  qres.add (disij, j);</div>
|
|
<div class="line"><a name="l01246"></a><span class="lineno"> 1246</span>  }</div>
|
|
<div class="line"><a name="l01247"></a><span class="lineno"> 1247</span>  } <span class="keywordflow">else</span> {</div>
|
|
<div class="line"><a name="l01248"></a><span class="lineno"> 1248</span>  <span class="keywordtype">float</span> ip = fvec_inner_product (x_, y_, d);</div>
|
|
<div class="line"><a name="l01249"></a><span class="lineno"> 1249</span>  <span class="keywordflow">if</span> (ip > radius) {</div>
|
|
<div class="line"><a name="l01250"></a><span class="lineno"> 1250</span>  qres.add (ip, j);</div>
|
|
<div class="line"><a name="l01251"></a><span class="lineno"> 1251</span>  }</div>
|
|
<div class="line"><a name="l01252"></a><span class="lineno"> 1252</span>  }</div>
|
|
<div class="line"><a name="l01253"></a><span class="lineno"> 1253</span>  y_ += d;</div>
|
|
<div class="line"><a name="l01254"></a><span class="lineno"> 1254</span>  }</div>
|
|
<div class="line"><a name="l01255"></a><span class="lineno"> 1255</span> </div>
|
|
<div class="line"><a name="l01256"></a><span class="lineno"> 1256</span>  }</div>
|
|
<div class="line"><a name="l01257"></a><span class="lineno"> 1257</span>  pres.finalize ();</div>
|
|
<div class="line"><a name="l01258"></a><span class="lineno"> 1258</span>  }</div>
|
|
<div class="line"><a name="l01259"></a><span class="lineno"> 1259</span> }</div>
|
|
<div class="line"><a name="l01260"></a><span class="lineno"> 1260</span> </div>
|
|
<div class="line"><a name="l01261"></a><span class="lineno"> 1261</span> </div>
|
|
<div class="line"><a name="l01262"></a><span class="lineno"> 1262</span> </div>
|
|
<div class="line"><a name="l01263"></a><span class="lineno"> 1263</span> </div>
|
|
<div class="line"><a name="l01264"></a><span class="lineno"> 1264</span> </div>
|
|
<div class="line"><a name="l01265"></a><span class="lineno"><a class="line" href="namespacefaiss.html#a1faa7bd079c9b1addf3058ddf882a000"> 1265</a></span> <span class="keywordtype">void</span> <a class="code" href="namespacefaiss.html#a1faa7bd079c9b1addf3058ddf882a000">range_search_L2sqr</a> (</div>
|
|
<div class="line"><a name="l01266"></a><span class="lineno"> 1266</span>  <span class="keyword">const</span> <span class="keywordtype">float</span> * x,</div>
|
|
<div class="line"><a name="l01267"></a><span class="lineno"> 1267</span>  <span class="keyword">const</span> <span class="keywordtype">float</span> * y,</div>
|
|
<div class="line"><a name="l01268"></a><span class="lineno"> 1268</span>  <span class="keywordtype">size_t</span> d, <span class="keywordtype">size_t</span> nx, <span class="keywordtype">size_t</span> ny,</div>
|
|
<div class="line"><a name="l01269"></a><span class="lineno"> 1269</span>  <span class="keywordtype">float</span> radius,</div>
|
|
<div class="line"><a name="l01270"></a><span class="lineno"> 1270</span>  <a class="code" href="structfaiss_1_1RangeSearchResult.html">RangeSearchResult</a> *res)</div>
|
|
<div class="line"><a name="l01271"></a><span class="lineno"> 1271</span> {</div>
|
|
<div class="line"><a name="l01272"></a><span class="lineno"> 1272</span> </div>
|
|
<div class="line"><a name="l01273"></a><span class="lineno"> 1273</span>  <span class="keywordflow">if</span> (d % 4 == 0 && nx < 20) {</div>
|
|
<div class="line"><a name="l01274"></a><span class="lineno"> 1274</span>  range_search_sse<true> (x, y, d, nx, ny, radius, res);</div>
|
|
<div class="line"><a name="l01275"></a><span class="lineno"> 1275</span>  } <span class="keywordflow">else</span> {</div>
|
|
<div class="line"><a name="l01276"></a><span class="lineno"> 1276</span>  range_search_blas<true> (x, y, d, nx, ny, radius, res);</div>
|
|
<div class="line"><a name="l01277"></a><span class="lineno"> 1277</span>  }</div>
|
|
<div class="line"><a name="l01278"></a><span class="lineno"> 1278</span> }</div>
|
|
<div class="line"><a name="l01279"></a><span class="lineno"> 1279</span> </div>
|
|
<div class="line"><a name="l01280"></a><span class="lineno"><a class="line" href="namespacefaiss.html#ab29d725b808df6f142b80f21aa45e507"> 1280</a></span> <span class="keywordtype">void</span> <a class="code" href="namespacefaiss.html#ab29d725b808df6f142b80f21aa45e507">range_search_inner_product</a> (</div>
|
|
<div class="line"><a name="l01281"></a><span class="lineno"> 1281</span>  <span class="keyword">const</span> <span class="keywordtype">float</span> * x,</div>
|
|
<div class="line"><a name="l01282"></a><span class="lineno"> 1282</span>  <span class="keyword">const</span> <span class="keywordtype">float</span> * y,</div>
|
|
<div class="line"><a name="l01283"></a><span class="lineno"> 1283</span>  <span class="keywordtype">size_t</span> d, <span class="keywordtype">size_t</span> nx, <span class="keywordtype">size_t</span> ny,</div>
|
|
<div class="line"><a name="l01284"></a><span class="lineno"> 1284</span>  <span class="keywordtype">float</span> radius,</div>
|
|
<div class="line"><a name="l01285"></a><span class="lineno"> 1285</span>  <a class="code" href="structfaiss_1_1RangeSearchResult.html">RangeSearchResult</a> *res)</div>
|
|
<div class="line"><a name="l01286"></a><span class="lineno"> 1286</span> {</div>
|
|
<div class="line"><a name="l01287"></a><span class="lineno"> 1287</span> </div>
|
|
<div class="line"><a name="l01288"></a><span class="lineno"> 1288</span>  <span class="keywordflow">if</span> (d % 4 == 0 && nx < 20) {</div>
|
|
<div class="line"><a name="l01289"></a><span class="lineno"> 1289</span>  range_search_sse<false> (x, y, d, nx, ny, radius, res);</div>
|
|
<div class="line"><a name="l01290"></a><span class="lineno"> 1290</span>  } <span class="keywordflow">else</span> {</div>
|
|
<div class="line"><a name="l01291"></a><span class="lineno"> 1291</span>  range_search_blas<false> (x, y, d, nx, ny, radius, res);</div>
|
|
<div class="line"><a name="l01292"></a><span class="lineno"> 1292</span>  }</div>
|
|
<div class="line"><a name="l01293"></a><span class="lineno"> 1293</span> }</div>
|
|
<div class="line"><a name="l01294"></a><span class="lineno"> 1294</span> </div>
|
|
<div class="line"><a name="l01295"></a><span class="lineno"> 1295</span> </div>
|
|
<div class="line"><a name="l01296"></a><span class="lineno"> 1296</span> </div>
|
|
<div class="line"><a name="l01297"></a><span class="lineno"> 1297</span> <span class="comment">/***************************************************************************</span></div>
|
|
<div class="line"><a name="l01298"></a><span class="lineno"> 1298</span> <span class="comment"> * Some matrix manipulation functions</span></div>
|
|
<div class="line"><a name="l01299"></a><span class="lineno"> 1299</span> <span class="comment"> ***************************************************************************/</span></div>
|
|
<div class="line"><a name="l01300"></a><span class="lineno"> 1300</span> </div>
|
|
<div class="line"><a name="l01301"></a><span class="lineno"> 1301</span> </div>
|
|
<div class="line"><a name="l01302"></a><span class="lineno"> 1302</span> <span class="comment">/* This function exists because the Torch counterpart is extremly slow</span></div>
|
|
<div class="line"><a name="l01303"></a><span class="lineno"> 1303</span> <span class="comment"> (not multi-threaded + unexpected overhead even in single thread).</span></div>
|
|
<div class="line"><a name="l01304"></a><span class="lineno"> 1304</span> <span class="comment"> It is here to implement the usual property |x-y|^2=|x|^2+|y|^2-2<x|y> */</span></div>
|
|
<div class="line"><a name="l01305"></a><span class="lineno"> 1305</span> <span class="keywordtype">void</span> inner_product_to_L2sqr (<span class="keywordtype">float</span> * __restrict dis,</div>
|
|
<div class="line"><a name="l01306"></a><span class="lineno"> 1306</span>  <span class="keyword">const</span> <span class="keywordtype">float</span> * nr1,</div>
|
|
<div class="line"><a name="l01307"></a><span class="lineno"> 1307</span>  <span class="keyword">const</span> <span class="keywordtype">float</span> * nr2,</div>
|
|
<div class="line"><a name="l01308"></a><span class="lineno"> 1308</span>  <span class="keywordtype">size_t</span> n1, <span class="keywordtype">size_t</span> n2)</div>
|
|
<div class="line"><a name="l01309"></a><span class="lineno"> 1309</span> {</div>
|
|
<div class="line"><a name="l01310"></a><span class="lineno"> 1310</span> </div>
|
|
<div class="line"><a name="l01311"></a><span class="lineno"> 1311</span> <span class="preprocessor">#pragma omp parallel for</span></div>
|
|
<div class="line"><a name="l01312"></a><span class="lineno"> 1312</span> <span class="preprocessor"></span> <span class="keywordflow">for</span> (<span class="keywordtype">size_t</span> j = 0 ; j < n1 ; j++) {</div>
|
|
<div class="line"><a name="l01313"></a><span class="lineno"> 1313</span>  <span class="keywordtype">float</span> * disj = dis + j * n2;</div>
|
|
<div class="line"><a name="l01314"></a><span class="lineno"> 1314</span>  <span class="keywordflow">for</span> (<span class="keywordtype">size_t</span> i = 0 ; i < n2 ; i++)</div>
|
|
<div class="line"><a name="l01315"></a><span class="lineno"> 1315</span>  disj[i] = nr1[j] + nr2[i] - 2 * disj[i];</div>
|
|
<div class="line"><a name="l01316"></a><span class="lineno"> 1316</span>  }</div>
|
|
<div class="line"><a name="l01317"></a><span class="lineno"> 1317</span> }</div>
|
|
<div class="line"><a name="l01318"></a><span class="lineno"> 1318</span> </div>
|
|
<div class="line"><a name="l01319"></a><span class="lineno"> 1319</span> </div>
|
|
<div class="line"><a name="l01320"></a><span class="lineno"><a class="line" href="namespacefaiss.html#afb68fe89ad5e948974da1b70d7b4157c"> 1320</a></span> <span class="keywordtype">void</span> <a class="code" href="namespacefaiss.html#afb68fe89ad5e948974da1b70d7b4157c">matrix_qr</a> (<span class="keywordtype">int</span> m, <span class="keywordtype">int</span> n, <span class="keywordtype">float</span> *a)</div>
|
|
<div class="line"><a name="l01321"></a><span class="lineno"> 1321</span> {</div>
|
|
<div class="line"><a name="l01322"></a><span class="lineno"> 1322</span>  FAISS_THROW_IF_NOT (m >= n);</div>
|
|
<div class="line"><a name="l01323"></a><span class="lineno"> 1323</span>  FINTEGER mi = m, ni = n, ki = mi < ni ? mi : ni;</div>
|
|
<div class="line"><a name="l01324"></a><span class="lineno"> 1324</span>  std::vector<float> tau (ki);</div>
|
|
<div class="line"><a name="l01325"></a><span class="lineno"> 1325</span>  FINTEGER lwork = -1, info;</div>
|
|
<div class="line"><a name="l01326"></a><span class="lineno"> 1326</span>  <span class="keywordtype">float</span> work_size;</div>
|
|
<div class="line"><a name="l01327"></a><span class="lineno"> 1327</span> </div>
|
|
<div class="line"><a name="l01328"></a><span class="lineno"> 1328</span>  sgeqrf_ (&mi, &ni, a, &mi, tau.data(),</div>
|
|
<div class="line"><a name="l01329"></a><span class="lineno"> 1329</span>  &work_size, &lwork, &info);</div>
|
|
<div class="line"><a name="l01330"></a><span class="lineno"> 1330</span>  lwork = size_t(work_size);</div>
|
|
<div class="line"><a name="l01331"></a><span class="lineno"> 1331</span>  std::vector<float> work (lwork);</div>
|
|
<div class="line"><a name="l01332"></a><span class="lineno"> 1332</span> </div>
|
|
<div class="line"><a name="l01333"></a><span class="lineno"> 1333</span>  sgeqrf_ (&mi, &ni, a, &mi,</div>
|
|
<div class="line"><a name="l01334"></a><span class="lineno"> 1334</span>  tau.data(), work.data(), &lwork, &info);</div>
|
|
<div class="line"><a name="l01335"></a><span class="lineno"> 1335</span> </div>
|
|
<div class="line"><a name="l01336"></a><span class="lineno"> 1336</span>  sorgqr_ (&mi, &ni, &ki, a, &mi, tau.data(),</div>
|
|
<div class="line"><a name="l01337"></a><span class="lineno"> 1337</span>  work.data(), &lwork, &info);</div>
|
|
<div class="line"><a name="l01338"></a><span class="lineno"> 1338</span> </div>
|
|
<div class="line"><a name="l01339"></a><span class="lineno"> 1339</span> }</div>
|
|
<div class="line"><a name="l01340"></a><span class="lineno"> 1340</span> </div>
|
|
<div class="line"><a name="l01341"></a><span class="lineno"> 1341</span> </div>
|
|
<div class="line"><a name="l01342"></a><span class="lineno"><a class="line" href="namespacefaiss.html#a3d9c7db82d43c1f0ab1d28b92bc9fe57"> 1342</a></span> <span class="keywordtype">void</span> <a class="code" href="namespacefaiss.html#a3d9c7db82d43c1f0ab1d28b92bc9fe57">pairwise_L2sqr</a> (<span class="keywordtype">long</span> d,</div>
|
|
<div class="line"><a name="l01343"></a><span class="lineno"> 1343</span>  <span class="keywordtype">long</span> nq, <span class="keyword">const</span> <span class="keywordtype">float</span> *xq,</div>
|
|
<div class="line"><a name="l01344"></a><span class="lineno"> 1344</span>  <span class="keywordtype">long</span> nb, <span class="keyword">const</span> <span class="keywordtype">float</span> *xb,</div>
|
|
<div class="line"><a name="l01345"></a><span class="lineno"> 1345</span>  <span class="keywordtype">float</span> *dis,</div>
|
|
<div class="line"><a name="l01346"></a><span class="lineno"> 1346</span>  <span class="keywordtype">long</span> ldq, <span class="keywordtype">long</span> ldb, <span class="keywordtype">long</span> ldd)</div>
|
|
<div class="line"><a name="l01347"></a><span class="lineno"> 1347</span> {</div>
|
|
<div class="line"><a name="l01348"></a><span class="lineno"> 1348</span>  <span class="keywordflow">if</span> (nq == 0 || nb == 0) <span class="keywordflow">return</span>;</div>
|
|
<div class="line"><a name="l01349"></a><span class="lineno"> 1349</span>  <span class="keywordflow">if</span> (ldq == -1) ldq = d;</div>
|
|
<div class="line"><a name="l01350"></a><span class="lineno"> 1350</span>  <span class="keywordflow">if</span> (ldb == -1) ldb = d;</div>
|
|
<div class="line"><a name="l01351"></a><span class="lineno"> 1351</span>  <span class="keywordflow">if</span> (ldd == -1) ldd = nb;</div>
|
|
<div class="line"><a name="l01352"></a><span class="lineno"> 1352</span> </div>
|
|
<div class="line"><a name="l01353"></a><span class="lineno"> 1353</span>  <span class="comment">// store in beginning of distance matrix to avoid malloc</span></div>
|
|
<div class="line"><a name="l01354"></a><span class="lineno"> 1354</span>  <span class="keywordtype">float</span> *b_norms = dis;</div>
|
|
<div class="line"><a name="l01355"></a><span class="lineno"> 1355</span> </div>
|
|
<div class="line"><a name="l01356"></a><span class="lineno"> 1356</span> <span class="preprocessor">#pragma omp parallel for</span></div>
|
|
<div class="line"><a name="l01357"></a><span class="lineno"> 1357</span> <span class="preprocessor"></span> <span class="keywordflow">for</span> (<span class="keywordtype">long</span> i = 0; i < nb; i++)</div>
|
|
<div class="line"><a name="l01358"></a><span class="lineno"> 1358</span>  b_norms [i] = <a class="code" href="namespacefaiss.html#a7a49180ebf10e643217bbce5862c7f84">fvec_norm_L2sqr</a> (xb + i * ldb, d);</div>
|
|
<div class="line"><a name="l01359"></a><span class="lineno"> 1359</span> </div>
|
|
<div class="line"><a name="l01360"></a><span class="lineno"> 1360</span> <span class="preprocessor">#pragma omp parallel for</span></div>
|
|
<div class="line"><a name="l01361"></a><span class="lineno"> 1361</span> <span class="preprocessor"></span> <span class="keywordflow">for</span> (<span class="keywordtype">long</span> i = 1; i < nq; i++) {</div>
|
|
<div class="line"><a name="l01362"></a><span class="lineno"> 1362</span>  <span class="keywordtype">float</span> q_norm = <a class="code" href="namespacefaiss.html#a7a49180ebf10e643217bbce5862c7f84">fvec_norm_L2sqr</a> (xq + i * ldq, d);</div>
|
|
<div class="line"><a name="l01363"></a><span class="lineno"> 1363</span>  <span class="keywordflow">for</span> (<span class="keywordtype">long</span> j = 0; j < nb; j++)</div>
|
|
<div class="line"><a name="l01364"></a><span class="lineno"> 1364</span>  dis[i * ldd + j] = q_norm + b_norms [j];</div>
|
|
<div class="line"><a name="l01365"></a><span class="lineno"> 1365</span>  }</div>
|
|
<div class="line"><a name="l01366"></a><span class="lineno"> 1366</span> </div>
|
|
<div class="line"><a name="l01367"></a><span class="lineno"> 1367</span>  {</div>
|
|
<div class="line"><a name="l01368"></a><span class="lineno"> 1368</span>  <span class="keywordtype">float</span> q_norm = <a class="code" href="namespacefaiss.html#a7a49180ebf10e643217bbce5862c7f84">fvec_norm_L2sqr</a> (xq, d);</div>
|
|
<div class="line"><a name="l01369"></a><span class="lineno"> 1369</span>  <span class="keywordflow">for</span> (<span class="keywordtype">long</span> j = 0; j < nb; j++)</div>
|
|
<div class="line"><a name="l01370"></a><span class="lineno"> 1370</span>  dis[j] += q_norm;</div>
|
|
<div class="line"><a name="l01371"></a><span class="lineno"> 1371</span>  }</div>
|
|
<div class="line"><a name="l01372"></a><span class="lineno"> 1372</span> </div>
|
|
<div class="line"><a name="l01373"></a><span class="lineno"> 1373</span>  {</div>
|
|
<div class="line"><a name="l01374"></a><span class="lineno"> 1374</span>  FINTEGER nbi = nb, nqi = nq, di = d, ldqi = ldq, ldbi = ldb, lddi = ldd;</div>
|
|
<div class="line"><a name="l01375"></a><span class="lineno"> 1375</span>  <span class="keywordtype">float</span> one = 1.0, minus_2 = -2.0;</div>
|
|
<div class="line"><a name="l01376"></a><span class="lineno"> 1376</span> </div>
|
|
<div class="line"><a name="l01377"></a><span class="lineno"> 1377</span>  sgemm_ (<span class="stringliteral">"Transposed"</span>, <span class="stringliteral">"Not transposed"</span>,</div>
|
|
<div class="line"><a name="l01378"></a><span class="lineno"> 1378</span>  &nbi, &nqi, &di,</div>
|
|
<div class="line"><a name="l01379"></a><span class="lineno"> 1379</span>  &minus_2,</div>
|
|
<div class="line"><a name="l01380"></a><span class="lineno"> 1380</span>  xb, &ldbi,</div>
|
|
<div class="line"><a name="l01381"></a><span class="lineno"> 1381</span>  xq, &ldqi,</div>
|
|
<div class="line"><a name="l01382"></a><span class="lineno"> 1382</span>  &one, dis, &lddi);</div>
|
|
<div class="line"><a name="l01383"></a><span class="lineno"> 1383</span>  }</div>
|
|
<div class="line"><a name="l01384"></a><span class="lineno"> 1384</span> </div>
|
|
<div class="line"><a name="l01385"></a><span class="lineno"> 1385</span> </div>
|
|
<div class="line"><a name="l01386"></a><span class="lineno"> 1386</span> }</div>
|
|
<div class="line"><a name="l01387"></a><span class="lineno"> 1387</span> </div>
|
|
<div class="line"><a name="l01388"></a><span class="lineno"> 1388</span> </div>
|
|
<div class="line"><a name="l01389"></a><span class="lineno"> 1389</span> </div>
|
|
<div class="line"><a name="l01390"></a><span class="lineno"> 1390</span> <span class="comment">/***************************************************************************</span></div>
|
|
<div class="line"><a name="l01391"></a><span class="lineno"> 1391</span> <span class="comment"> * Kmeans subroutine</span></div>
|
|
<div class="line"><a name="l01392"></a><span class="lineno"> 1392</span> <span class="comment"> ***************************************************************************/</span></div>
|
|
<div class="line"><a name="l01393"></a><span class="lineno"> 1393</span> </div>
|
|
<div class="line"><a name="l01394"></a><span class="lineno"> 1394</span> <span class="comment">// a bit above machine epsilon for float16</span></div>
|
|
<div class="line"><a name="l01395"></a><span class="lineno"> 1395</span> </div>
|
|
<div class="line"><a name="l01396"></a><span class="lineno"> 1396</span> <span class="preprocessor">#define EPS (1 / 1024.)</span></div>
|
|
<div class="line"><a name="l01397"></a><span class="lineno"> 1397</span> <span class="preprocessor"></span></div>
|
|
<div class="line"><a name="l01398"></a><span class="lineno"> 1398</span> <span class="comment">/* For k-means, compute centroids given assignment of vectors to centroids */</span></div>
|
|
<div class="line"><a name="l01399"></a><span class="lineno"><a class="line" href="namespacefaiss.html#aa2c6a9e87a64bba8e8014e14f70bde21"> 1399</a></span> <span class="keywordtype">int</span> <a class="code" href="namespacefaiss.html#aa2c6a9e87a64bba8e8014e14f70bde21">km_update_centroids</a> (<span class="keyword">const</span> <span class="keywordtype">float</span> * x,</div>
|
|
<div class="line"><a name="l01400"></a><span class="lineno"> 1400</span>  <span class="keywordtype">float</span> * centroids,</div>
|
|
<div class="line"><a name="l01401"></a><span class="lineno"> 1401</span>  <span class="keywordtype">long</span> * assign,</div>
|
|
<div class="line"><a name="l01402"></a><span class="lineno"> 1402</span>  <span class="keywordtype">size_t</span> d, <span class="keywordtype">size_t</span> k, <span class="keywordtype">size_t</span> n,</div>
|
|
<div class="line"><a name="l01403"></a><span class="lineno"> 1403</span>  <span class="keywordtype">size_t</span> k_frozen)</div>
|
|
<div class="line"><a name="l01404"></a><span class="lineno"> 1404</span> {</div>
|
|
<div class="line"><a name="l01405"></a><span class="lineno"> 1405</span>  k -= k_frozen;</div>
|
|
<div class="line"><a name="l01406"></a><span class="lineno"> 1406</span>  centroids += k_frozen * d;</div>
|
|
<div class="line"><a name="l01407"></a><span class="lineno"> 1407</span> </div>
|
|
<div class="line"><a name="l01408"></a><span class="lineno"> 1408</span>  std::vector<size_t> hassign(k);</div>
|
|
<div class="line"><a name="l01409"></a><span class="lineno"> 1409</span>  memset (centroids, 0, <span class="keyword">sizeof</span>(*centroids) * d * k);</div>
|
|
<div class="line"><a name="l01410"></a><span class="lineno"> 1410</span> </div>
|
|
<div class="line"><a name="l01411"></a><span class="lineno"> 1411</span> <span class="preprocessor">#pragma omp parallel</span></div>
|
|
<div class="line"><a name="l01412"></a><span class="lineno"> 1412</span> <span class="preprocessor"></span> {</div>
|
|
<div class="line"><a name="l01413"></a><span class="lineno"> 1413</span>  <span class="keywordtype">int</span> nt = omp_get_num_threads();</div>
|
|
<div class="line"><a name="l01414"></a><span class="lineno"> 1414</span>  <span class="keywordtype">int</span> rank = omp_get_thread_num();</div>
|
|
<div class="line"><a name="l01415"></a><span class="lineno"> 1415</span>  <span class="comment">// this thread is taking care of centroids c0:c1</span></div>
|
|
<div class="line"><a name="l01416"></a><span class="lineno"> 1416</span>  <span class="keywordtype">size_t</span> c0 = (k * rank) / nt;</div>
|
|
<div class="line"><a name="l01417"></a><span class="lineno"> 1417</span>  <span class="keywordtype">size_t</span> c1 = (k * (rank + 1)) / nt;</div>
|
|
<div class="line"><a name="l01418"></a><span class="lineno"> 1418</span>  <span class="keyword">const</span> <span class="keywordtype">float</span> *xi = x;</div>
|
|
<div class="line"><a name="l01419"></a><span class="lineno"> 1419</span>  <span class="keywordtype">size_t</span> nacc = 0;</div>
|
|
<div class="line"><a name="l01420"></a><span class="lineno"> 1420</span> </div>
|
|
<div class="line"><a name="l01421"></a><span class="lineno"> 1421</span>  <span class="keywordflow">for</span> (<span class="keywordtype">size_t</span> i = 0; i < n; i++) {</div>
|
|
<div class="line"><a name="l01422"></a><span class="lineno"> 1422</span>  <span class="keywordtype">long</span> ci = assign[i];</div>
|
|
<div class="line"><a name="l01423"></a><span class="lineno"> 1423</span>  assert (ci >= 0 && ci < k + k_frozen);</div>
|
|
<div class="line"><a name="l01424"></a><span class="lineno"> 1424</span>  ci -= k_frozen;</div>
|
|
<div class="line"><a name="l01425"></a><span class="lineno"> 1425</span>  <span class="keywordflow">if</span> (ci >= c0 && ci < c1) {</div>
|
|
<div class="line"><a name="l01426"></a><span class="lineno"> 1426</span>  <span class="keywordtype">float</span> * c = centroids + ci * d;</div>
|
|
<div class="line"><a name="l01427"></a><span class="lineno"> 1427</span>  hassign[ci]++;</div>
|
|
<div class="line"><a name="l01428"></a><span class="lineno"> 1428</span>  <span class="keywordflow">for</span> (<span class="keywordtype">size_t</span> j = 0; j < d; j++)</div>
|
|
<div class="line"><a name="l01429"></a><span class="lineno"> 1429</span>  c[j] += xi[j];</div>
|
|
<div class="line"><a name="l01430"></a><span class="lineno"> 1430</span>  nacc++;</div>
|
|
<div class="line"><a name="l01431"></a><span class="lineno"> 1431</span>  }</div>
|
|
<div class="line"><a name="l01432"></a><span class="lineno"> 1432</span>  xi += d;</div>
|
|
<div class="line"><a name="l01433"></a><span class="lineno"> 1433</span>  }</div>
|
|
<div class="line"><a name="l01434"></a><span class="lineno"> 1434</span> </div>
|
|
<div class="line"><a name="l01435"></a><span class="lineno"> 1435</span>  }</div>
|
|
<div class="line"><a name="l01436"></a><span class="lineno"> 1436</span> </div>
|
|
<div class="line"><a name="l01437"></a><span class="lineno"> 1437</span> <span class="preprocessor">#pragma omp parallel for</span></div>
|
|
<div class="line"><a name="l01438"></a><span class="lineno"> 1438</span> <span class="preprocessor"></span> <span class="keywordflow">for</span> (<span class="keywordtype">size_t</span> ci = 0; ci < k; ci++) {</div>
|
|
<div class="line"><a name="l01439"></a><span class="lineno"> 1439</span>  <span class="keywordtype">float</span> * c = centroids + ci * d;</div>
|
|
<div class="line"><a name="l01440"></a><span class="lineno"> 1440</span>  <span class="keywordtype">float</span> ni = (float) hassign[ci];</div>
|
|
<div class="line"><a name="l01441"></a><span class="lineno"> 1441</span>  <span class="keywordflow">if</span> (ni != 0) {</div>
|
|
<div class="line"><a name="l01442"></a><span class="lineno"> 1442</span>  <span class="keywordflow">for</span> (<span class="keywordtype">size_t</span> j = 0; j < d; j++)</div>
|
|
<div class="line"><a name="l01443"></a><span class="lineno"> 1443</span>  c[j] /= ni;</div>
|
|
<div class="line"><a name="l01444"></a><span class="lineno"> 1444</span>  }</div>
|
|
<div class="line"><a name="l01445"></a><span class="lineno"> 1445</span>  }</div>
|
|
<div class="line"><a name="l01446"></a><span class="lineno"> 1446</span> </div>
|
|
<div class="line"><a name="l01447"></a><span class="lineno"> 1447</span>  <span class="comment">/* Take care of void clusters */</span></div>
|
|
<div class="line"><a name="l01448"></a><span class="lineno"> 1448</span>  <span class="keywordtype">size_t</span> nsplit = 0;</div>
|
|
<div class="line"><a name="l01449"></a><span class="lineno"> 1449</span>  <a class="code" href="structfaiss_1_1RandomGenerator.html">RandomGenerator</a> rng (1234);</div>
|
|
<div class="line"><a name="l01450"></a><span class="lineno"> 1450</span>  <span class="keywordflow">for</span> (<span class="keywordtype">size_t</span> ci = 0; ci < k; ci++) {</div>
|
|
<div class="line"><a name="l01451"></a><span class="lineno"> 1451</span>  <span class="keywordflow">if</span> (hassign[ci] == 0) { <span class="comment">/* need to redefine a centroid */</span></div>
|
|
<div class="line"><a name="l01452"></a><span class="lineno"> 1452</span>  <span class="keywordtype">size_t</span> cj;</div>
|
|
<div class="line"><a name="l01453"></a><span class="lineno"> 1453</span>  <span class="keywordflow">for</span> (cj = 0; 1; cj = (cj + 1) % k) {</div>
|
|
<div class="line"><a name="l01454"></a><span class="lineno"> 1454</span>  <span class="comment">/* probability to pick this cluster for split */</span></div>
|
|
<div class="line"><a name="l01455"></a><span class="lineno"> 1455</span>  <span class="keywordtype">float</span> p = (hassign[cj] - 1.0) / (float) (n - k);</div>
|
|
<div class="line"><a name="l01456"></a><span class="lineno"> 1456</span>  <span class="keywordtype">float</span> r = rng.<a class="code" href="structfaiss_1_1RandomGenerator.html#ac82a433d7bfa56d750907ba5cf74aed7">rand_float</a> ();</div>
|
|
<div class="line"><a name="l01457"></a><span class="lineno"> 1457</span>  <span class="keywordflow">if</span> (r < p) {</div>
|
|
<div class="line"><a name="l01458"></a><span class="lineno"> 1458</span>  <span class="keywordflow">break</span>; <span class="comment">/* found our cluster to be split */</span></div>
|
|
<div class="line"><a name="l01459"></a><span class="lineno"> 1459</span>  }</div>
|
|
<div class="line"><a name="l01460"></a><span class="lineno"> 1460</span>  }</div>
|
|
<div class="line"><a name="l01461"></a><span class="lineno"> 1461</span>  memcpy (centroids+ci*d, centroids+cj*d, <span class="keyword">sizeof</span>(*centroids) * d);</div>
|
|
<div class="line"><a name="l01462"></a><span class="lineno"> 1462</span> </div>
|
|
<div class="line"><a name="l01463"></a><span class="lineno"> 1463</span>  <span class="comment">/* small symmetric pertubation. Much better than */</span></div>
|
|
<div class="line"><a name="l01464"></a><span class="lineno"> 1464</span>  <span class="keywordflow">for</span> (<span class="keywordtype">size_t</span> j = 0; j < d; j++) {</div>
|
|
<div class="line"><a name="l01465"></a><span class="lineno"> 1465</span>  <span class="keywordflow">if</span> (j % 2 == 0) {</div>
|
|
<div class="line"><a name="l01466"></a><span class="lineno"> 1466</span>  centroids[ci * d + j] *= 1 + EPS;</div>
|
|
<div class="line"><a name="l01467"></a><span class="lineno"> 1467</span>  centroids[cj * d + j] *= 1 - EPS;</div>
|
|
<div class="line"><a name="l01468"></a><span class="lineno"> 1468</span>  } <span class="keywordflow">else</span> {</div>
|
|
<div class="line"><a name="l01469"></a><span class="lineno"> 1469</span>  centroids[ci * d + j] *= 1 - EPS;</div>
|
|
<div class="line"><a name="l01470"></a><span class="lineno"> 1470</span>  centroids[cj * d + j] *= 1 + EPS;</div>
|
|
<div class="line"><a name="l01471"></a><span class="lineno"> 1471</span>  }</div>
|
|
<div class="line"><a name="l01472"></a><span class="lineno"> 1472</span>  }</div>
|
|
<div class="line"><a name="l01473"></a><span class="lineno"> 1473</span> </div>
|
|
<div class="line"><a name="l01474"></a><span class="lineno"> 1474</span>  <span class="comment">/* assume even split of the cluster */</span></div>
|
|
<div class="line"><a name="l01475"></a><span class="lineno"> 1475</span>  hassign[ci] = hassign[cj] / 2;</div>
|
|
<div class="line"><a name="l01476"></a><span class="lineno"> 1476</span>  hassign[cj] -= hassign[ci];</div>
|
|
<div class="line"><a name="l01477"></a><span class="lineno"> 1477</span>  nsplit++;</div>
|
|
<div class="line"><a name="l01478"></a><span class="lineno"> 1478</span>  }</div>
|
|
<div class="line"><a name="l01479"></a><span class="lineno"> 1479</span>  }</div>
|
|
<div class="line"><a name="l01480"></a><span class="lineno"> 1480</span> </div>
|
|
<div class="line"><a name="l01481"></a><span class="lineno"> 1481</span>  <span class="keywordflow">return</span> nsplit;</div>
|
|
<div class="line"><a name="l01482"></a><span class="lineno"> 1482</span> }</div>
|
|
<div class="line"><a name="l01483"></a><span class="lineno"> 1483</span> </div>
|
|
<div class="line"><a name="l01484"></a><span class="lineno"> 1484</span> <span class="preprocessor">#undef EPS</span></div>
|
|
<div class="line"><a name="l01485"></a><span class="lineno"> 1485</span> <span class="preprocessor"></span></div>
|
|
<div class="line"><a name="l01486"></a><span class="lineno"> 1486</span> </div>
|
|
<div class="line"><a name="l01487"></a><span class="lineno"> 1487</span> </div>
|
|
<div class="line"><a name="l01488"></a><span class="lineno"> 1488</span> <span class="comment">/***************************************************************************</span></div>
|
|
<div class="line"><a name="l01489"></a><span class="lineno"> 1489</span> <span class="comment"> * Result list routines</span></div>
|
|
<div class="line"><a name="l01490"></a><span class="lineno"> 1490</span> <span class="comment"> ***************************************************************************/</span></div>
|
|
<div class="line"><a name="l01491"></a><span class="lineno"> 1491</span> </div>
|
|
<div class="line"><a name="l01492"></a><span class="lineno"> 1492</span> </div>
|
|
<div class="line"><a name="l01493"></a><span class="lineno"><a class="line" href="namespacefaiss.html#ae0ee1b6fbd3d6da0f1a3550a780ca24c"> 1493</a></span> <span class="keywordtype">void</span> <a class="code" href="namespacefaiss.html#ae0ee1b6fbd3d6da0f1a3550a780ca24c">ranklist_handle_ties</a> (<span class="keywordtype">int</span> k, <span class="keywordtype">long</span> *idx, <span class="keyword">const</span> <span class="keywordtype">float</span> *dis)</div>
|
|
<div class="line"><a name="l01494"></a><span class="lineno"> 1494</span> {</div>
|
|
<div class="line"><a name="l01495"></a><span class="lineno"> 1495</span>  <span class="keywordtype">float</span> prev_dis = -1e38;</div>
|
|
<div class="line"><a name="l01496"></a><span class="lineno"> 1496</span>  <span class="keywordtype">int</span> prev_i = -1;</div>
|
|
<div class="line"><a name="l01497"></a><span class="lineno"> 1497</span>  <span class="keywordflow">for</span> (<span class="keywordtype">int</span> i = 0; i < k; i++) {</div>
|
|
<div class="line"><a name="l01498"></a><span class="lineno"> 1498</span>  <span class="keywordflow">if</span> (dis[i] != prev_dis) {</div>
|
|
<div class="line"><a name="l01499"></a><span class="lineno"> 1499</span>  <span class="keywordflow">if</span> (i > prev_i + 1) {</div>
|
|
<div class="line"><a name="l01500"></a><span class="lineno"> 1500</span>  <span class="comment">// sort between prev_i and i - 1</span></div>
|
|
<div class="line"><a name="l01501"></a><span class="lineno"> 1501</span>  std::sort (idx + prev_i, idx + i);</div>
|
|
<div class="line"><a name="l01502"></a><span class="lineno"> 1502</span>  }</div>
|
|
<div class="line"><a name="l01503"></a><span class="lineno"> 1503</span>  prev_i = i;</div>
|
|
<div class="line"><a name="l01504"></a><span class="lineno"> 1504</span>  prev_dis = dis[i];</div>
|
|
<div class="line"><a name="l01505"></a><span class="lineno"> 1505</span>  }</div>
|
|
<div class="line"><a name="l01506"></a><span class="lineno"> 1506</span>  }</div>
|
|
<div class="line"><a name="l01507"></a><span class="lineno"> 1507</span> }</div>
|
|
<div class="line"><a name="l01508"></a><span class="lineno"> 1508</span> </div>
|
|
<div class="line"><a name="l01509"></a><span class="lineno"><a class="line" href="namespacefaiss.html#afb7b33f6892678ba79aaf5e71777837c"> 1509</a></span> <span class="keywordtype">size_t</span> <a class="code" href="namespacefaiss.html#afb7b33f6892678ba79aaf5e71777837c">merge_result_table_with</a> (<span class="keywordtype">size_t</span> n, <span class="keywordtype">size_t</span> k,</div>
|
|
<div class="line"><a name="l01510"></a><span class="lineno"> 1510</span>  <span class="keywordtype">long</span> *I0, <span class="keywordtype">float</span> *D0,</div>
|
|
<div class="line"><a name="l01511"></a><span class="lineno"> 1511</span>  <span class="keyword">const</span> <span class="keywordtype">long</span> *I1, <span class="keyword">const</span> <span class="keywordtype">float</span> *D1,</div>
|
|
<div class="line"><a name="l01512"></a><span class="lineno"> 1512</span>  <span class="keywordtype">bool</span> keep_min,</div>
|
|
<div class="line"><a name="l01513"></a><span class="lineno"> 1513</span>  <span class="keywordtype">long</span> translation)</div>
|
|
<div class="line"><a name="l01514"></a><span class="lineno"> 1514</span> {</div>
|
|
<div class="line"><a name="l01515"></a><span class="lineno"> 1515</span>  <span class="keywordtype">size_t</span> n1 = 0;</div>
|
|
<div class="line"><a name="l01516"></a><span class="lineno"> 1516</span> </div>
|
|
<div class="line"><a name="l01517"></a><span class="lineno"> 1517</span> <span class="preprocessor">#pragma omp parallel reduction(+:n1)</span></div>
|
|
<div class="line"><a name="l01518"></a><span class="lineno"> 1518</span> <span class="preprocessor"></span> {</div>
|
|
<div class="line"><a name="l01519"></a><span class="lineno"> 1519</span>  std::vector<long> tmpI (k);</div>
|
|
<div class="line"><a name="l01520"></a><span class="lineno"> 1520</span>  std::vector<float> tmpD (k);</div>
|
|
<div class="line"><a name="l01521"></a><span class="lineno"> 1521</span> </div>
|
|
<div class="line"><a name="l01522"></a><span class="lineno"> 1522</span> <span class="preprocessor">#pragma omp for</span></div>
|
|
<div class="line"><a name="l01523"></a><span class="lineno"> 1523</span> <span class="preprocessor"></span> <span class="keywordflow">for</span> (<span class="keywordtype">size_t</span> i = 0; i < n; i++) {</div>
|
|
<div class="line"><a name="l01524"></a><span class="lineno"> 1524</span>  <span class="keywordtype">long</span> *lI0 = I0 + i * k;</div>
|
|
<div class="line"><a name="l01525"></a><span class="lineno"> 1525</span>  <span class="keywordtype">float</span> *lD0 = D0 + i * k;</div>
|
|
<div class="line"><a name="l01526"></a><span class="lineno"> 1526</span>  <span class="keyword">const</span> <span class="keywordtype">long</span> *lI1 = I1 + i * k;</div>
|
|
<div class="line"><a name="l01527"></a><span class="lineno"> 1527</span>  <span class="keyword">const</span> <span class="keywordtype">float</span> *lD1 = D1 + i * k;</div>
|
|
<div class="line"><a name="l01528"></a><span class="lineno"> 1528</span>  <span class="keywordtype">size_t</span> r0 = 0;</div>
|
|
<div class="line"><a name="l01529"></a><span class="lineno"> 1529</span>  <span class="keywordtype">size_t</span> r1 = 0;</div>
|
|
<div class="line"><a name="l01530"></a><span class="lineno"> 1530</span> </div>
|
|
<div class="line"><a name="l01531"></a><span class="lineno"> 1531</span>  <span class="keywordflow">if</span> (keep_min) {</div>
|
|
<div class="line"><a name="l01532"></a><span class="lineno"> 1532</span>  <span class="keywordflow">for</span> (<span class="keywordtype">size_t</span> j = 0; j < k; j++) {</div>
|
|
<div class="line"><a name="l01533"></a><span class="lineno"> 1533</span> </div>
|
|
<div class="line"><a name="l01534"></a><span class="lineno"> 1534</span>  <span class="keywordflow">if</span> (lI0[r0] >= 0 && lD0[r0] < lD1[r1]) {</div>
|
|
<div class="line"><a name="l01535"></a><span class="lineno"> 1535</span>  tmpD[j] = lD0[r0];</div>
|
|
<div class="line"><a name="l01536"></a><span class="lineno"> 1536</span>  tmpI[j] = lI0[r0];</div>
|
|
<div class="line"><a name="l01537"></a><span class="lineno"> 1537</span>  r0++;</div>
|
|
<div class="line"><a name="l01538"></a><span class="lineno"> 1538</span>  } <span class="keywordflow">else</span> <span class="keywordflow">if</span> (lD1[r1] >= 0) {</div>
|
|
<div class="line"><a name="l01539"></a><span class="lineno"> 1539</span>  tmpD[j] = lD1[r1];</div>
|
|
<div class="line"><a name="l01540"></a><span class="lineno"> 1540</span>  tmpI[j] = lI1[r1] + translation;</div>
|
|
<div class="line"><a name="l01541"></a><span class="lineno"> 1541</span>  r1++;</div>
|
|
<div class="line"><a name="l01542"></a><span class="lineno"> 1542</span>  } <span class="keywordflow">else</span> { <span class="comment">// both are NaNs</span></div>
|
|
<div class="line"><a name="l01543"></a><span class="lineno"> 1543</span>  tmpD[j] = NAN;</div>
|
|
<div class="line"><a name="l01544"></a><span class="lineno"> 1544</span>  tmpI[j] = -1;</div>
|
|
<div class="line"><a name="l01545"></a><span class="lineno"> 1545</span>  }</div>
|
|
<div class="line"><a name="l01546"></a><span class="lineno"> 1546</span>  }</div>
|
|
<div class="line"><a name="l01547"></a><span class="lineno"> 1547</span>  } <span class="keywordflow">else</span> {</div>
|
|
<div class="line"><a name="l01548"></a><span class="lineno"> 1548</span>  <span class="keywordflow">for</span> (<span class="keywordtype">size_t</span> j = 0; j < k; j++) {</div>
|
|
<div class="line"><a name="l01549"></a><span class="lineno"> 1549</span>  <span class="keywordflow">if</span> (lI0[r0] >= 0 && lD0[r0] > lD1[r1]) {</div>
|
|
<div class="line"><a name="l01550"></a><span class="lineno"> 1550</span>  tmpD[j] = lD0[r0];</div>
|
|
<div class="line"><a name="l01551"></a><span class="lineno"> 1551</span>  tmpI[j] = lI0[r0];</div>
|
|
<div class="line"><a name="l01552"></a><span class="lineno"> 1552</span>  r0++;</div>
|
|
<div class="line"><a name="l01553"></a><span class="lineno"> 1553</span>  } <span class="keywordflow">else</span> <span class="keywordflow">if</span> (lD1[r1] >= 0) {</div>
|
|
<div class="line"><a name="l01554"></a><span class="lineno"> 1554</span>  tmpD[j] = lD1[r1];</div>
|
|
<div class="line"><a name="l01555"></a><span class="lineno"> 1555</span>  tmpI[j] = lI1[r1] + translation;</div>
|
|
<div class="line"><a name="l01556"></a><span class="lineno"> 1556</span>  r1++;</div>
|
|
<div class="line"><a name="l01557"></a><span class="lineno"> 1557</span>  } <span class="keywordflow">else</span> { <span class="comment">// both are NaNs</span></div>
|
|
<div class="line"><a name="l01558"></a><span class="lineno"> 1558</span>  tmpD[j] = NAN;</div>
|
|
<div class="line"><a name="l01559"></a><span class="lineno"> 1559</span>  tmpI[j] = -1;</div>
|
|
<div class="line"><a name="l01560"></a><span class="lineno"> 1560</span>  }</div>
|
|
<div class="line"><a name="l01561"></a><span class="lineno"> 1561</span>  }</div>
|
|
<div class="line"><a name="l01562"></a><span class="lineno"> 1562</span>  }</div>
|
|
<div class="line"><a name="l01563"></a><span class="lineno"> 1563</span>  n1 += r1;</div>
|
|
<div class="line"><a name="l01564"></a><span class="lineno"> 1564</span>  memcpy (lD0, tmpD.data(), <span class="keyword">sizeof</span> (lD0[0]) * k);</div>
|
|
<div class="line"><a name="l01565"></a><span class="lineno"> 1565</span>  memcpy (lI0, tmpI.data(), <span class="keyword">sizeof</span> (lI0[0]) * k);</div>
|
|
<div class="line"><a name="l01566"></a><span class="lineno"> 1566</span>  }</div>
|
|
<div class="line"><a name="l01567"></a><span class="lineno"> 1567</span>  }</div>
|
|
<div class="line"><a name="l01568"></a><span class="lineno"> 1568</span> </div>
|
|
<div class="line"><a name="l01569"></a><span class="lineno"> 1569</span>  <span class="keywordflow">return</span> n1;</div>
|
|
<div class="line"><a name="l01570"></a><span class="lineno"> 1570</span> }</div>
|
|
<div class="line"><a name="l01571"></a><span class="lineno"> 1571</span> </div>
|
|
<div class="line"><a name="l01572"></a><span class="lineno"> 1572</span> </div>
|
|
<div class="line"><a name="l01573"></a><span class="lineno"> 1573</span> </div>
|
|
<div class="line"><a name="l01574"></a><span class="lineno"><a class="line" href="namespacefaiss.html#a8dbc652ba48d41f126b8815004899448"> 1574</a></span> <span class="keywordtype">size_t</span> <a class="code" href="namespacefaiss.html#a8dbc652ba48d41f126b8815004899448">ranklist_intersection_size</a> (<span class="keywordtype">size_t</span> k1, <span class="keyword">const</span> <span class="keywordtype">long</span> *v1,</div>
|
|
<div class="line"><a name="l01575"></a><span class="lineno"> 1575</span>  <span class="keywordtype">size_t</span> k2, <span class="keyword">const</span> <span class="keywordtype">long</span> *v2_in)</div>
|
|
<div class="line"><a name="l01576"></a><span class="lineno"> 1576</span> {</div>
|
|
<div class="line"><a name="l01577"></a><span class="lineno"> 1577</span>  <span class="keywordflow">if</span> (k2 > k1) <span class="keywordflow">return</span> <a class="code" href="namespacefaiss.html#a8dbc652ba48d41f126b8815004899448">ranklist_intersection_size</a> (k2, v2_in, k1, v1);</div>
|
|
<div class="line"><a name="l01578"></a><span class="lineno"> 1578</span>  <span class="keywordtype">long</span> *v2 = <span class="keyword">new</span> <span class="keywordtype">long</span> [k2];</div>
|
|
<div class="line"><a name="l01579"></a><span class="lineno"> 1579</span>  memcpy (v2, v2_in, <span class="keyword">sizeof</span> (<span class="keywordtype">long</span>) * k2);</div>
|
|
<div class="line"><a name="l01580"></a><span class="lineno"> 1580</span>  std::sort (v2, v2 + k2);</div>
|
|
<div class="line"><a name="l01581"></a><span class="lineno"> 1581</span>  { <span class="comment">// de-dup v2</span></div>
|
|
<div class="line"><a name="l01582"></a><span class="lineno"> 1582</span>  <span class="keywordtype">long</span> prev = -1;</div>
|
|
<div class="line"><a name="l01583"></a><span class="lineno"> 1583</span>  <span class="keywordtype">size_t</span> wp = 0;</div>
|
|
<div class="line"><a name="l01584"></a><span class="lineno"> 1584</span>  <span class="keywordflow">for</span> (<span class="keywordtype">size_t</span> i = 0; i < k2; i++) {</div>
|
|
<div class="line"><a name="l01585"></a><span class="lineno"> 1585</span>  <span class="keywordflow">if</span> (v2 [i] != prev) {</div>
|
|
<div class="line"><a name="l01586"></a><span class="lineno"> 1586</span>  v2[wp++] = prev = v2 [i];</div>
|
|
<div class="line"><a name="l01587"></a><span class="lineno"> 1587</span>  }</div>
|
|
<div class="line"><a name="l01588"></a><span class="lineno"> 1588</span>  }</div>
|
|
<div class="line"><a name="l01589"></a><span class="lineno"> 1589</span>  k2 = wp;</div>
|
|
<div class="line"><a name="l01590"></a><span class="lineno"> 1590</span>  }</div>
|
|
<div class="line"><a name="l01591"></a><span class="lineno"> 1591</span>  <span class="keyword">const</span> <span class="keywordtype">long</span> seen_flag = 1L << 60;</div>
|
|
<div class="line"><a name="l01592"></a><span class="lineno"> 1592</span>  <span class="keywordtype">size_t</span> count = 0;</div>
|
|
<div class="line"><a name="l01593"></a><span class="lineno"> 1593</span>  <span class="keywordflow">for</span> (<span class="keywordtype">size_t</span> i = 0; i < k1; i++) {</div>
|
|
<div class="line"><a name="l01594"></a><span class="lineno"> 1594</span>  <span class="keywordtype">long</span> q = v1 [i];</div>
|
|
<div class="line"><a name="l01595"></a><span class="lineno"> 1595</span>  <span class="keywordtype">size_t</span> i0 = 0, i1 = k2;</div>
|
|
<div class="line"><a name="l01596"></a><span class="lineno"> 1596</span>  <span class="keywordflow">while</span> (i0 + 1 < i1) {</div>
|
|
<div class="line"><a name="l01597"></a><span class="lineno"> 1597</span>  <span class="keywordtype">size_t</span> imed = (i1 + i0) / 2;</div>
|
|
<div class="line"><a name="l01598"></a><span class="lineno"> 1598</span>  <span class="keywordtype">long</span> piv = v2 [imed] & ~seen_flag;</div>
|
|
<div class="line"><a name="l01599"></a><span class="lineno"> 1599</span>  <span class="keywordflow">if</span> (piv <= q) i0 = imed;</div>
|
|
<div class="line"><a name="l01600"></a><span class="lineno"> 1600</span>  <span class="keywordflow">else</span> i1 = imed;</div>
|
|
<div class="line"><a name="l01601"></a><span class="lineno"> 1601</span>  }</div>
|
|
<div class="line"><a name="l01602"></a><span class="lineno"> 1602</span>  <span class="keywordflow">if</span> (v2 [i0] == q) {</div>
|
|
<div class="line"><a name="l01603"></a><span class="lineno"> 1603</span>  count++;</div>
|
|
<div class="line"><a name="l01604"></a><span class="lineno"> 1604</span>  v2 [i0] |= seen_flag;</div>
|
|
<div class="line"><a name="l01605"></a><span class="lineno"> 1605</span>  }</div>
|
|
<div class="line"><a name="l01606"></a><span class="lineno"> 1606</span>  }</div>
|
|
<div class="line"><a name="l01607"></a><span class="lineno"> 1607</span>  <span class="keyword">delete</span> [] v2;</div>
|
|
<div class="line"><a name="l01608"></a><span class="lineno"> 1608</span> </div>
|
|
<div class="line"><a name="l01609"></a><span class="lineno"> 1609</span>  <span class="keywordflow">return</span> count;</div>
|
|
<div class="line"><a name="l01610"></a><span class="lineno"> 1610</span> }</div>
|
|
<div class="line"><a name="l01611"></a><span class="lineno"> 1611</span> </div>
|
|
<div class="line"><a name="l01612"></a><span class="lineno"><a class="line" href="namespacefaiss.html#a94c1d99ac39d22e362aa27ce7c7ff714"> 1612</a></span> <span class="keywordtype">double</span> <a class="code" href="namespacefaiss.html#a94c1d99ac39d22e362aa27ce7c7ff714">imbalance_factor</a> (<span class="keywordtype">int</span> k, <span class="keyword">const</span> <span class="keywordtype">int</span> *hist) {</div>
|
|
<div class="line"><a name="l01613"></a><span class="lineno"> 1613</span>  <span class="keywordtype">double</span> tot = 0, uf = 0;</div>
|
|
<div class="line"><a name="l01614"></a><span class="lineno"> 1614</span> </div>
|
|
<div class="line"><a name="l01615"></a><span class="lineno"> 1615</span>  <span class="keywordflow">for</span> (<span class="keywordtype">int</span> i = 0 ; i < k ; i++) {</div>
|
|
<div class="line"><a name="l01616"></a><span class="lineno"> 1616</span>  tot += hist[i];</div>
|
|
<div class="line"><a name="l01617"></a><span class="lineno"> 1617</span>  uf += hist[i] * (double) hist[i];</div>
|
|
<div class="line"><a name="l01618"></a><span class="lineno"> 1618</span>  }</div>
|
|
<div class="line"><a name="l01619"></a><span class="lineno"> 1619</span>  uf = uf * k / (tot * tot);</div>
|
|
<div class="line"><a name="l01620"></a><span class="lineno"> 1620</span> </div>
|
|
<div class="line"><a name="l01621"></a><span class="lineno"> 1621</span>  <span class="keywordflow">return</span> uf;</div>
|
|
<div class="line"><a name="l01622"></a><span class="lineno"> 1622</span> }</div>
|
|
<div class="line"><a name="l01623"></a><span class="lineno"> 1623</span> </div>
|
|
<div class="line"><a name="l01624"></a><span class="lineno"> 1624</span> </div>
|
|
<div class="line"><a name="l01625"></a><span class="lineno"><a class="line" href="namespacefaiss.html#af762526714e6138009c72aee98657538"> 1625</a></span> <span class="keywordtype">double</span> <a class="code" href="namespacefaiss.html#a94c1d99ac39d22e362aa27ce7c7ff714">imbalance_factor</a> (<span class="keywordtype">int</span> n, <span class="keywordtype">int</span> k, <span class="keyword">const</span> <span class="keywordtype">long</span> *assign) {</div>
|
|
<div class="line"><a name="l01626"></a><span class="lineno"> 1626</span>  std::vector<int> hist(k, 0);</div>
|
|
<div class="line"><a name="l01627"></a><span class="lineno"> 1627</span>  <span class="keywordflow">for</span> (<span class="keywordtype">int</span> i = 0; i < n; i++) {</div>
|
|
<div class="line"><a name="l01628"></a><span class="lineno"> 1628</span>  hist[assign[i]]++;</div>
|
|
<div class="line"><a name="l01629"></a><span class="lineno"> 1629</span>  }</div>
|
|
<div class="line"><a name="l01630"></a><span class="lineno"> 1630</span> </div>
|
|
<div class="line"><a name="l01631"></a><span class="lineno"> 1631</span>  <span class="keywordflow">return</span> <a class="code" href="namespacefaiss.html#a94c1d99ac39d22e362aa27ce7c7ff714">imbalance_factor</a> (k, hist.data());</div>
|
|
<div class="line"><a name="l01632"></a><span class="lineno"> 1632</span> }</div>
|
|
<div class="line"><a name="l01633"></a><span class="lineno"> 1633</span> </div>
|
|
<div class="line"><a name="l01634"></a><span class="lineno"> 1634</span> </div>
|
|
<div class="line"><a name="l01635"></a><span class="lineno"> 1635</span> </div>
|
|
<div class="line"><a name="l01636"></a><span class="lineno"><a class="line" href="namespacefaiss.html#a7bea462108bee98d8d5859b51ec4db8e"> 1636</a></span> <span class="keywordtype">int</span> <a class="code" href="namespacefaiss.html#a7bea462108bee98d8d5859b51ec4db8e">ivec_hist</a> (<span class="keywordtype">size_t</span> n, <span class="keyword">const</span> <span class="keywordtype">int</span> * v, <span class="keywordtype">int</span> vmax, <span class="keywordtype">int</span> *hist) {</div>
|
|
<div class="line"><a name="l01637"></a><span class="lineno"> 1637</span>  memset (hist, 0, <span class="keyword">sizeof</span>(hist[0]) * vmax);</div>
|
|
<div class="line"><a name="l01638"></a><span class="lineno"> 1638</span>  <span class="keywordtype">int</span> nout = 0;</div>
|
|
<div class="line"><a name="l01639"></a><span class="lineno"> 1639</span>  <span class="keywordflow">while</span> (n--) {</div>
|
|
<div class="line"><a name="l01640"></a><span class="lineno"> 1640</span>  <span class="keywordflow">if</span> (v[n] < 0 || v[n] >= vmax) nout++;</div>
|
|
<div class="line"><a name="l01641"></a><span class="lineno"> 1641</span>  <span class="keywordflow">else</span> hist[v[n]]++;</div>
|
|
<div class="line"><a name="l01642"></a><span class="lineno"> 1642</span>  }</div>
|
|
<div class="line"><a name="l01643"></a><span class="lineno"> 1643</span>  <span class="keywordflow">return</span> nout;</div>
|
|
<div class="line"><a name="l01644"></a><span class="lineno"> 1644</span> }</div>
|
|
<div class="line"><a name="l01645"></a><span class="lineno"> 1645</span> </div>
|
|
<div class="line"><a name="l01646"></a><span class="lineno"> 1646</span> </div>
|
|
<div class="line"><a name="l01647"></a><span class="lineno"><a class="line" href="namespacefaiss.html#a154a47857ed321b9db91122770a16e09"> 1647</a></span> <span class="keywordtype">void</span> <a class="code" href="namespacefaiss.html#a154a47857ed321b9db91122770a16e09">bincode_hist</a>(<span class="keywordtype">size_t</span> n, <span class="keywordtype">size_t</span> nbits, <span class="keyword">const</span> uint8_t *codes, <span class="keywordtype">int</span> *hist)</div>
|
|
<div class="line"><a name="l01648"></a><span class="lineno"> 1648</span> {</div>
|
|
<div class="line"><a name="l01649"></a><span class="lineno"> 1649</span>  FAISS_THROW_IF_NOT (nbits % 8 == 0);</div>
|
|
<div class="line"><a name="l01650"></a><span class="lineno"> 1650</span>  <span class="keywordtype">size_t</span> d = nbits / 8;</div>
|
|
<div class="line"><a name="l01651"></a><span class="lineno"> 1651</span>  std::vector<int> accu(d * 256);</div>
|
|
<div class="line"><a name="l01652"></a><span class="lineno"> 1652</span>  <span class="keyword">const</span> uint8_t *c = codes;</div>
|
|
<div class="line"><a name="l01653"></a><span class="lineno"> 1653</span>  <span class="keywordflow">for</span> (<span class="keywordtype">size_t</span> i = 0; i < n; i++)</div>
|
|
<div class="line"><a name="l01654"></a><span class="lineno"> 1654</span>  <span class="keywordflow">for</span>(<span class="keywordtype">int</span> j = 0; j < d; j++)</div>
|
|
<div class="line"><a name="l01655"></a><span class="lineno"> 1655</span>  accu[j * 256 + *c++]++;</div>
|
|
<div class="line"><a name="l01656"></a><span class="lineno"> 1656</span>  memset (hist, 0, <span class="keyword">sizeof</span>(*hist) * nbits);</div>
|
|
<div class="line"><a name="l01657"></a><span class="lineno"> 1657</span>  <span class="keywordflow">for</span> (<span class="keywordtype">int</span> i = 0; i < d; i++) {</div>
|
|
<div class="line"><a name="l01658"></a><span class="lineno"> 1658</span>  <span class="keyword">const</span> <span class="keywordtype">int</span> *ai = accu.data() + i * 256;</div>
|
|
<div class="line"><a name="l01659"></a><span class="lineno"> 1659</span>  <span class="keywordtype">int</span> * hi = hist + i * 8;</div>
|
|
<div class="line"><a name="l01660"></a><span class="lineno"> 1660</span>  <span class="keywordflow">for</span> (<span class="keywordtype">int</span> j = 0; j < 256; j++)</div>
|
|
<div class="line"><a name="l01661"></a><span class="lineno"> 1661</span>  <span class="keywordflow">for</span> (<span class="keywordtype">int</span> k = 0; k < 8; k++)</div>
|
|
<div class="line"><a name="l01662"></a><span class="lineno"> 1662</span>  <span class="keywordflow">if</span> ((j >> k) & 1)</div>
|
|
<div class="line"><a name="l01663"></a><span class="lineno"> 1663</span>  hi[k] += ai[j];</div>
|
|
<div class="line"><a name="l01664"></a><span class="lineno"> 1664</span>  }</div>
|
|
<div class="line"><a name="l01665"></a><span class="lineno"> 1665</span> </div>
|
|
<div class="line"><a name="l01666"></a><span class="lineno"> 1666</span> }</div>
|
|
<div class="line"><a name="l01667"></a><span class="lineno"> 1667</span> </div>
|
|
<div class="line"><a name="l01668"></a><span class="lineno"> 1668</span> </div>
|
|
<div class="line"><a name="l01669"></a><span class="lineno"> 1669</span> </div>
|
|
<div class="line"><a name="l01670"></a><span class="lineno"><a class="line" href="namespacefaiss.html#a4369329c9dbdfe23e3f35d09ba7b5c6e"> 1670</a></span> <span class="keywordtype">size_t</span> <a class="code" href="namespacefaiss.html#a4369329c9dbdfe23e3f35d09ba7b5c6e">ivec_checksum</a> (<span class="keywordtype">size_t</span> n, <span class="keyword">const</span> <span class="keywordtype">int</span> *a)</div>
|
|
<div class="line"><a name="l01671"></a><span class="lineno"> 1671</span> {</div>
|
|
<div class="line"><a name="l01672"></a><span class="lineno"> 1672</span>  <span class="keywordtype">size_t</span> cs = 112909;</div>
|
|
<div class="line"><a name="l01673"></a><span class="lineno"> 1673</span>  <span class="keywordflow">while</span> (n--) cs = cs * 65713 + a[n] * 1686049;</div>
|
|
<div class="line"><a name="l01674"></a><span class="lineno"> 1674</span>  <span class="keywordflow">return</span> cs;</div>
|
|
<div class="line"><a name="l01675"></a><span class="lineno"> 1675</span> }</div>
|
|
<div class="line"><a name="l01676"></a><span class="lineno"> 1676</span> </div>
|
|
<div class="line"><a name="l01677"></a><span class="lineno"> 1677</span> </div>
|
|
<div class="line"><a name="l01678"></a><span class="lineno"> 1678</span> <span class="keyword">namespace </span>{</div>
|
|
<div class="line"><a name="l01679"></a><span class="lineno"> 1679</span>  <span class="keyword">struct </span>ArgsortComparator {</div>
|
|
<div class="line"><a name="l01680"></a><span class="lineno"> 1680</span>  <span class="keyword">const</span> <span class="keywordtype">float</span> *vals;</div>
|
|
<div class="line"><a name="l01681"></a><span class="lineno"> 1681</span>  <span class="keywordtype">bool</span> operator() (<span class="keyword">const</span> <span class="keywordtype">size_t</span> a, <span class="keyword">const</span> <span class="keywordtype">size_t</span> b)<span class="keyword"> const </span>{</div>
|
|
<div class="line"><a name="l01682"></a><span class="lineno"> 1682</span>  <span class="keywordflow">return</span> vals[a] < vals[b];</div>
|
|
<div class="line"><a name="l01683"></a><span class="lineno"> 1683</span>  }</div>
|
|
<div class="line"><a name="l01684"></a><span class="lineno"> 1684</span>  };</div>
|
|
<div class="line"><a name="l01685"></a><span class="lineno"> 1685</span> </div>
|
|
<div class="line"><a name="l01686"></a><span class="lineno"> 1686</span>  <span class="keyword">struct </span>SegmentS {</div>
|
|
<div class="line"><a name="l01687"></a><span class="lineno"> 1687</span>  <span class="keywordtype">size_t</span> i0; <span class="comment">// begin pointer in the permutation array</span></div>
|
|
<div class="line"><a name="l01688"></a><span class="lineno"> 1688</span>  <span class="keywordtype">size_t</span> i1; <span class="comment">// end</span></div>
|
|
<div class="line"><a name="l01689"></a><span class="lineno"> 1689</span>  <span class="keywordtype">size_t</span> len()<span class="keyword"> const </span>{</div>
|
|
<div class="line"><a name="l01690"></a><span class="lineno"> 1690</span>  <span class="keywordflow">return</span> i1 - i0;</div>
|
|
<div class="line"><a name="l01691"></a><span class="lineno"> 1691</span>  }</div>
|
|
<div class="line"><a name="l01692"></a><span class="lineno"> 1692</span>  };</div>
|
|
<div class="line"><a name="l01693"></a><span class="lineno"> 1693</span> </div>
|
|
<div class="line"><a name="l01694"></a><span class="lineno"> 1694</span>  <span class="comment">// see https://en.wikipedia.org/wiki/Merge_algorithm#Parallel_merge</span></div>
|
|
<div class="line"><a name="l01695"></a><span class="lineno"> 1695</span>  <span class="comment">// extended to > 1 merge thread</span></div>
|
|
<div class="line"><a name="l01696"></a><span class="lineno"> 1696</span> </div>
|
|
<div class="line"><a name="l01697"></a><span class="lineno"> 1697</span>  <span class="comment">// merges 2 ranges that should be consecutive on the source into</span></div>
|
|
<div class="line"><a name="l01698"></a><span class="lineno"> 1698</span>  <span class="comment">// the union of the two on the destination</span></div>
|
|
<div class="line"><a name="l01699"></a><span class="lineno"> 1699</span>  <span class="keyword">template</span><<span class="keyword">typename</span> T></div>
|
|
<div class="line"><a name="l01700"></a><span class="lineno"> 1700</span>  <span class="keywordtype">void</span> parallel_merge (<span class="keyword">const</span> T *src, T *dst,</div>
|
|
<div class="line"><a name="l01701"></a><span class="lineno"> 1701</span>  SegmentS &s1, SegmentS & s2, <span class="keywordtype">int</span> nt,</div>
|
|
<div class="line"><a name="l01702"></a><span class="lineno"> 1702</span>  <span class="keyword">const</span> ArgsortComparator & comp) {</div>
|
|
<div class="line"><a name="l01703"></a><span class="lineno"> 1703</span>  <span class="keywordflow">if</span> (s2.len() > s1.len()) { <span class="comment">// make sure that s1 larger than s2</span></div>
|
|
<div class="line"><a name="l01704"></a><span class="lineno"> 1704</span>  std::swap(s1, s2);</div>
|
|
<div class="line"><a name="l01705"></a><span class="lineno"> 1705</span>  }</div>
|
|
<div class="line"><a name="l01706"></a><span class="lineno"> 1706</span> </div>
|
|
<div class="line"><a name="l01707"></a><span class="lineno"> 1707</span>  <span class="comment">// compute sub-ranges for each thread</span></div>
|
|
<div class="line"><a name="l01708"></a><span class="lineno"> 1708</span>  SegmentS s1s[nt], s2s[nt], sws[nt];</div>
|
|
<div class="line"><a name="l01709"></a><span class="lineno"> 1709</span>  s2s[0].i0 = s2.i0;</div>
|
|
<div class="line"><a name="l01710"></a><span class="lineno"> 1710</span>  s2s[nt - 1].i1 = s2.i1;</div>
|
|
<div class="line"><a name="l01711"></a><span class="lineno"> 1711</span> </div>
|
|
<div class="line"><a name="l01712"></a><span class="lineno"> 1712</span>  <span class="comment">// not sure parallel actually helps here</span></div>
|
|
<div class="line"><a name="l01713"></a><span class="lineno"> 1713</span> <span class="preprocessor">#pragma omp parallel for num_threads(nt)</span></div>
|
|
<div class="line"><a name="l01714"></a><span class="lineno"> 1714</span> <span class="preprocessor"></span> <span class="keywordflow">for</span> (<span class="keywordtype">int</span> t = 0; t < nt; t++) {</div>
|
|
<div class="line"><a name="l01715"></a><span class="lineno"> 1715</span>  s1s[t].i0 = s1.i0 + s1.len() * t / nt;</div>
|
|
<div class="line"><a name="l01716"></a><span class="lineno"> 1716</span>  s1s[t].i1 = s1.i0 + s1.len() * (t + 1) / nt;</div>
|
|
<div class="line"><a name="l01717"></a><span class="lineno"> 1717</span> </div>
|
|
<div class="line"><a name="l01718"></a><span class="lineno"> 1718</span>  <span class="keywordflow">if</span> (t + 1 < nt) {</div>
|
|
<div class="line"><a name="l01719"></a><span class="lineno"> 1719</span>  T pivot = src[s1s[t].i1];</div>
|
|
<div class="line"><a name="l01720"></a><span class="lineno"> 1720</span>  <span class="keywordtype">size_t</span> i0 = s2.i0, i1 = s2.i1;</div>
|
|
<div class="line"><a name="l01721"></a><span class="lineno"> 1721</span>  <span class="keywordflow">while</span> (i0 + 1 < i1) {</div>
|
|
<div class="line"><a name="l01722"></a><span class="lineno"> 1722</span>  <span class="keywordtype">size_t</span> imed = (i1 + i0) / 2;</div>
|
|
<div class="line"><a name="l01723"></a><span class="lineno"> 1723</span>  <span class="keywordflow">if</span> (comp (pivot, src[imed])) {i1 = imed; }</div>
|
|
<div class="line"><a name="l01724"></a><span class="lineno"> 1724</span>  <span class="keywordflow">else</span> {i0 = imed; }</div>
|
|
<div class="line"><a name="l01725"></a><span class="lineno"> 1725</span>  }</div>
|
|
<div class="line"><a name="l01726"></a><span class="lineno"> 1726</span>  s2s[t].i1 = s2s[t + 1].i0 = i1;</div>
|
|
<div class="line"><a name="l01727"></a><span class="lineno"> 1727</span>  }</div>
|
|
<div class="line"><a name="l01728"></a><span class="lineno"> 1728</span>  }</div>
|
|
<div class="line"><a name="l01729"></a><span class="lineno"> 1729</span>  s1.i0 = std::min(s1.i0, s2.i0);</div>
|
|
<div class="line"><a name="l01730"></a><span class="lineno"> 1730</span>  s1.i1 = std::max(s1.i1, s2.i1);</div>
|
|
<div class="line"><a name="l01731"></a><span class="lineno"> 1731</span>  s2 = s1;</div>
|
|
<div class="line"><a name="l01732"></a><span class="lineno"> 1732</span>  sws[0].i0 = s1.i0;</div>
|
|
<div class="line"><a name="l01733"></a><span class="lineno"> 1733</span>  <span class="keywordflow">for</span> (<span class="keywordtype">int</span> t = 0; t < nt; t++) {</div>
|
|
<div class="line"><a name="l01734"></a><span class="lineno"> 1734</span>  sws[t].i1 = sws[t].i0 + s1s[t].len() + s2s[t].len();</div>
|
|
<div class="line"><a name="l01735"></a><span class="lineno"> 1735</span>  <span class="keywordflow">if</span> (t + 1 < nt) {</div>
|
|
<div class="line"><a name="l01736"></a><span class="lineno"> 1736</span>  sws[t + 1].i0 = sws[t].i1;</div>
|
|
<div class="line"><a name="l01737"></a><span class="lineno"> 1737</span>  }</div>
|
|
<div class="line"><a name="l01738"></a><span class="lineno"> 1738</span>  }</div>
|
|
<div class="line"><a name="l01739"></a><span class="lineno"> 1739</span>  assert(sws[nt - 1].i1 == s1.i1);</div>
|
|
<div class="line"><a name="l01740"></a><span class="lineno"> 1740</span> </div>
|
|
<div class="line"><a name="l01741"></a><span class="lineno"> 1741</span>  <span class="comment">// do the actual merging</span></div>
|
|
<div class="line"><a name="l01742"></a><span class="lineno"> 1742</span> <span class="preprocessor">#pragma omp parallel for num_threads(nt)</span></div>
|
|
<div class="line"><a name="l01743"></a><span class="lineno"> 1743</span> <span class="preprocessor"></span> <span class="keywordflow">for</span> (<span class="keywordtype">int</span> t = 0; t < nt; t++) {</div>
|
|
<div class="line"><a name="l01744"></a><span class="lineno"> 1744</span>  SegmentS sw = sws[t];</div>
|
|
<div class="line"><a name="l01745"></a><span class="lineno"> 1745</span>  SegmentS s1t = s1s[t];</div>
|
|
<div class="line"><a name="l01746"></a><span class="lineno"> 1746</span>  SegmentS s2t = s2s[t];</div>
|
|
<div class="line"><a name="l01747"></a><span class="lineno"> 1747</span>  <span class="keywordflow">if</span> (s1t.i0 < s1t.i1 && s2t.i0 < s2t.i1) {</div>
|
|
<div class="line"><a name="l01748"></a><span class="lineno"> 1748</span>  <span class="keywordflow">for</span> (;;) {</div>
|
|
<div class="line"><a name="l01749"></a><span class="lineno"> 1749</span>  <span class="comment">// assert (sw.len() == s1t.len() + s2t.len());</span></div>
|
|
<div class="line"><a name="l01750"></a><span class="lineno"> 1750</span>  <span class="keywordflow">if</span> (comp(src[s1t.i0], src[s2t.i0])) {</div>
|
|
<div class="line"><a name="l01751"></a><span class="lineno"> 1751</span>  dst[sw.i0++] = src[s1t.i0++];</div>
|
|
<div class="line"><a name="l01752"></a><span class="lineno"> 1752</span>  <span class="keywordflow">if</span> (s1t.i0 == s1t.i1) <span class="keywordflow">break</span>;</div>
|
|
<div class="line"><a name="l01753"></a><span class="lineno"> 1753</span>  } <span class="keywordflow">else</span> {</div>
|
|
<div class="line"><a name="l01754"></a><span class="lineno"> 1754</span>  dst[sw.i0++] = src[s2t.i0++];</div>
|
|
<div class="line"><a name="l01755"></a><span class="lineno"> 1755</span>  <span class="keywordflow">if</span> (s2t.i0 == s2t.i1) <span class="keywordflow">break</span>;</div>
|
|
<div class="line"><a name="l01756"></a><span class="lineno"> 1756</span>  }</div>
|
|
<div class="line"><a name="l01757"></a><span class="lineno"> 1757</span>  }</div>
|
|
<div class="line"><a name="l01758"></a><span class="lineno"> 1758</span>  }</div>
|
|
<div class="line"><a name="l01759"></a><span class="lineno"> 1759</span>  <span class="keywordflow">if</span> (s1t.len() > 0) {</div>
|
|
<div class="line"><a name="l01760"></a><span class="lineno"> 1760</span>  assert(s1t.len() == sw.len());</div>
|
|
<div class="line"><a name="l01761"></a><span class="lineno"> 1761</span>  memcpy(dst + sw.i0, src + s1t.i0, s1t.len() * <span class="keyword">sizeof</span>(dst[0]));</div>
|
|
<div class="line"><a name="l01762"></a><span class="lineno"> 1762</span>  } <span class="keywordflow">else</span> <span class="keywordflow">if</span> (s2t.len() > 0) {</div>
|
|
<div class="line"><a name="l01763"></a><span class="lineno"> 1763</span>  assert(s2t.len() == sw.len());</div>
|
|
<div class="line"><a name="l01764"></a><span class="lineno"> 1764</span>  memcpy(dst + sw.i0, src + s2t.i0, s2t.len() * <span class="keyword">sizeof</span>(dst[0]));</div>
|
|
<div class="line"><a name="l01765"></a><span class="lineno"> 1765</span>  }</div>
|
|
<div class="line"><a name="l01766"></a><span class="lineno"> 1766</span>  }</div>
|
|
<div class="line"><a name="l01767"></a><span class="lineno"> 1767</span>  }</div>
|
|
<div class="line"><a name="l01768"></a><span class="lineno"> 1768</span> </div>
|
|
<div class="line"><a name="l01769"></a><span class="lineno"> 1769</span> };</div>
|
|
<div class="line"><a name="l01770"></a><span class="lineno"> 1770</span> </div>
|
|
<div class="line"><a name="l01771"></a><span class="lineno"> 1771</span> <span class="keywordtype">void</span> fvec_argsort (<span class="keywordtype">size_t</span> n, <span class="keyword">const</span> <span class="keywordtype">float</span> *vals,</div>
|
|
<div class="line"><a name="l01772"></a><span class="lineno"> 1772</span>  <span class="keywordtype">size_t</span> *perm)</div>
|
|
<div class="line"><a name="l01773"></a><span class="lineno"> 1773</span> {</div>
|
|
<div class="line"><a name="l01774"></a><span class="lineno"> 1774</span>  <span class="keywordflow">for</span> (<span class="keywordtype">size_t</span> i = 0; i < n; i++) perm[i] = i;</div>
|
|
<div class="line"><a name="l01775"></a><span class="lineno"> 1775</span>  ArgsortComparator comp = {vals};</div>
|
|
<div class="line"><a name="l01776"></a><span class="lineno"> 1776</span>  std::sort (perm, perm + n, comp);</div>
|
|
<div class="line"><a name="l01777"></a><span class="lineno"> 1777</span> }</div>
|
|
<div class="line"><a name="l01778"></a><span class="lineno"> 1778</span> </div>
|
|
<div class="line"><a name="l01779"></a><span class="lineno"> 1779</span> <span class="keywordtype">void</span> fvec_argsort_parallel (<span class="keywordtype">size_t</span> n, <span class="keyword">const</span> <span class="keywordtype">float</span> *vals,</div>
|
|
<div class="line"><a name="l01780"></a><span class="lineno"> 1780</span>  <span class="keywordtype">size_t</span> *perm)</div>
|
|
<div class="line"><a name="l01781"></a><span class="lineno"> 1781</span> {</div>
|
|
<div class="line"><a name="l01782"></a><span class="lineno"> 1782</span>  <span class="keywordtype">size_t</span> * perm2 = <span class="keyword">new</span> <span class="keywordtype">size_t</span>[n];</div>
|
|
<div class="line"><a name="l01783"></a><span class="lineno"> 1783</span>  <span class="comment">// 2 result tables, during merging, flip between them</span></div>
|
|
<div class="line"><a name="l01784"></a><span class="lineno"> 1784</span>  <span class="keywordtype">size_t</span> *permB = perm2, *permA = perm;</div>
|
|
<div class="line"><a name="l01785"></a><span class="lineno"> 1785</span> </div>
|
|
<div class="line"><a name="l01786"></a><span class="lineno"> 1786</span>  <span class="keywordtype">int</span> nt = omp_get_max_threads();</div>
|
|
<div class="line"><a name="l01787"></a><span class="lineno"> 1787</span>  { <span class="comment">// prepare correct permutation so that the result ends in perm</span></div>
|
|
<div class="line"><a name="l01788"></a><span class="lineno"> 1788</span>  <span class="comment">// at final iteration</span></div>
|
|
<div class="line"><a name="l01789"></a><span class="lineno"> 1789</span>  <span class="keywordtype">int</span> nseg = nt;</div>
|
|
<div class="line"><a name="l01790"></a><span class="lineno"> 1790</span>  <span class="keywordflow">while</span> (nseg > 1) {</div>
|
|
<div class="line"><a name="l01791"></a><span class="lineno"> 1791</span>  nseg = (nseg + 1) / 2;</div>
|
|
<div class="line"><a name="l01792"></a><span class="lineno"> 1792</span>  std::swap (permA, permB);</div>
|
|
<div class="line"><a name="l01793"></a><span class="lineno"> 1793</span>  }</div>
|
|
<div class="line"><a name="l01794"></a><span class="lineno"> 1794</span>  }</div>
|
|
<div class="line"><a name="l01795"></a><span class="lineno"> 1795</span> </div>
|
|
<div class="line"><a name="l01796"></a><span class="lineno"> 1796</span> <span class="preprocessor">#pragma omp parallel</span></div>
|
|
<div class="line"><a name="l01797"></a><span class="lineno"> 1797</span> <span class="preprocessor"></span> <span class="keywordflow">for</span> (<span class="keywordtype">size_t</span> i = 0; i < n; i++) permA[i] = i;</div>
|
|
<div class="line"><a name="l01798"></a><span class="lineno"> 1798</span> </div>
|
|
<div class="line"><a name="l01799"></a><span class="lineno"> 1799</span>  ArgsortComparator comp = {vals};</div>
|
|
<div class="line"><a name="l01800"></a><span class="lineno"> 1800</span> </div>
|
|
<div class="line"><a name="l01801"></a><span class="lineno"> 1801</span>  SegmentS segs[nt];</div>
|
|
<div class="line"><a name="l01802"></a><span class="lineno"> 1802</span> </div>
|
|
<div class="line"><a name="l01803"></a><span class="lineno"> 1803</span>  <span class="comment">// independent sorts</span></div>
|
|
<div class="line"><a name="l01804"></a><span class="lineno"> 1804</span> <span class="preprocessor">#pragma omp parallel for</span></div>
|
|
<div class="line"><a name="l01805"></a><span class="lineno"> 1805</span> <span class="preprocessor"></span> <span class="keywordflow">for</span> (<span class="keywordtype">int</span> t = 0; t < nt; t++) {</div>
|
|
<div class="line"><a name="l01806"></a><span class="lineno"> 1806</span>  <span class="keywordtype">size_t</span> i0 = t * n / nt;</div>
|
|
<div class="line"><a name="l01807"></a><span class="lineno"> 1807</span>  <span class="keywordtype">size_t</span> i1 = (t + 1) * n / nt;</div>
|
|
<div class="line"><a name="l01808"></a><span class="lineno"> 1808</span>  SegmentS seg = {i0, i1};</div>
|
|
<div class="line"><a name="l01809"></a><span class="lineno"> 1809</span>  std::sort (permA + seg.i0, permA + seg.i1, comp);</div>
|
|
<div class="line"><a name="l01810"></a><span class="lineno"> 1810</span>  segs[t] = seg;</div>
|
|
<div class="line"><a name="l01811"></a><span class="lineno"> 1811</span>  }</div>
|
|
<div class="line"><a name="l01812"></a><span class="lineno"> 1812</span>  <span class="keywordtype">int</span> prev_nested = omp_get_nested();</div>
|
|
<div class="line"><a name="l01813"></a><span class="lineno"> 1813</span>  omp_set_nested(1);</div>
|
|
<div class="line"><a name="l01814"></a><span class="lineno"> 1814</span> </div>
|
|
<div class="line"><a name="l01815"></a><span class="lineno"> 1815</span>  <span class="keywordtype">int</span> nseg = nt;</div>
|
|
<div class="line"><a name="l01816"></a><span class="lineno"> 1816</span>  <span class="keywordflow">while</span> (nseg > 1) {</div>
|
|
<div class="line"><a name="l01817"></a><span class="lineno"> 1817</span>  <span class="keywordtype">int</span> nseg1 = (nseg + 1) / 2;</div>
|
|
<div class="line"><a name="l01818"></a><span class="lineno"> 1818</span>  <span class="keywordtype">int</span> sub_nt = nseg % 2 == 0 ? nt : nt - 1;</div>
|
|
<div class="line"><a name="l01819"></a><span class="lineno"> 1819</span>  <span class="keywordtype">int</span> sub_nseg1 = nseg / 2;</div>
|
|
<div class="line"><a name="l01820"></a><span class="lineno"> 1820</span> </div>
|
|
<div class="line"><a name="l01821"></a><span class="lineno"> 1821</span> <span class="preprocessor">#pragma omp parallel for num_threads(nseg1)</span></div>
|
|
<div class="line"><a name="l01822"></a><span class="lineno"> 1822</span> <span class="preprocessor"></span> <span class="keywordflow">for</span> (<span class="keywordtype">int</span> s = 0; s < nseg; s += 2) {</div>
|
|
<div class="line"><a name="l01823"></a><span class="lineno"> 1823</span>  <span class="keywordflow">if</span> (s + 1 == nseg) { <span class="comment">// otherwise isolated segment</span></div>
|
|
<div class="line"><a name="l01824"></a><span class="lineno"> 1824</span>  memcpy(permB + segs[s].i0, permA + segs[s].i0,</div>
|
|
<div class="line"><a name="l01825"></a><span class="lineno"> 1825</span>  segs[s].len() * <span class="keyword">sizeof</span>(<span class="keywordtype">size_t</span>));</div>
|
|
<div class="line"><a name="l01826"></a><span class="lineno"> 1826</span>  } <span class="keywordflow">else</span> {</div>
|
|
<div class="line"><a name="l01827"></a><span class="lineno"> 1827</span>  <span class="keywordtype">int</span> t0 = s * sub_nt / sub_nseg1;</div>
|
|
<div class="line"><a name="l01828"></a><span class="lineno"> 1828</span>  <span class="keywordtype">int</span> t1 = (s + 1) * sub_nt / sub_nseg1;</div>
|
|
<div class="line"><a name="l01829"></a><span class="lineno"> 1829</span>  printf(<span class="stringliteral">"merge %d %d, %d threads\n"</span>, s, s + 1, t1 - t0);</div>
|
|
<div class="line"><a name="l01830"></a><span class="lineno"> 1830</span>  parallel_merge(permA, permB, segs[s], segs[s + 1],</div>
|
|
<div class="line"><a name="l01831"></a><span class="lineno"> 1831</span>  t1 - t0, comp);</div>
|
|
<div class="line"><a name="l01832"></a><span class="lineno"> 1832</span>  }</div>
|
|
<div class="line"><a name="l01833"></a><span class="lineno"> 1833</span>  }</div>
|
|
<div class="line"><a name="l01834"></a><span class="lineno"> 1834</span>  <span class="keywordflow">for</span> (<span class="keywordtype">int</span> s = 0; s < nseg; s += 2)</div>
|
|
<div class="line"><a name="l01835"></a><span class="lineno"> 1835</span>  segs[s / 2] = segs[s];</div>
|
|
<div class="line"><a name="l01836"></a><span class="lineno"> 1836</span>  nseg = nseg1;</div>
|
|
<div class="line"><a name="l01837"></a><span class="lineno"> 1837</span>  std::swap (permA, permB);</div>
|
|
<div class="line"><a name="l01838"></a><span class="lineno"> 1838</span>  }</div>
|
|
<div class="line"><a name="l01839"></a><span class="lineno"> 1839</span>  assert (permA == perm);</div>
|
|
<div class="line"><a name="l01840"></a><span class="lineno"> 1840</span>  omp_set_nested(prev_nested);</div>
|
|
<div class="line"><a name="l01841"></a><span class="lineno"> 1841</span>  <span class="keyword">delete</span> [] perm2;</div>
|
|
<div class="line"><a name="l01842"></a><span class="lineno"> 1842</span> }</div>
|
|
<div class="line"><a name="l01843"></a><span class="lineno"> 1843</span> </div>
|
|
<div class="line"><a name="l01844"></a><span class="lineno"> 1844</span> </div>
|
|
<div class="line"><a name="l01845"></a><span class="lineno"> 1845</span> </div>
|
|
<div class="line"><a name="l01846"></a><span class="lineno"> 1846</span> </div>
|
|
<div class="line"><a name="l01847"></a><span class="lineno"> 1847</span> </div>
|
|
<div class="line"><a name="l01848"></a><span class="lineno"> 1848</span> </div>
|
|
<div class="line"><a name="l01849"></a><span class="lineno"> 1849</span> </div>
|
|
<div class="line"><a name="l01850"></a><span class="lineno"> 1850</span> </div>
|
|
<div class="line"><a name="l01851"></a><span class="lineno"> 1851</span> </div>
|
|
<div class="line"><a name="l01852"></a><span class="lineno"> 1852</span> </div>
|
|
<div class="line"><a name="l01853"></a><span class="lineno"> 1853</span> </div>
|
|
<div class="line"><a name="l01854"></a><span class="lineno"> 1854</span> </div>
|
|
<div class="line"><a name="l01855"></a><span class="lineno"> 1855</span> </div>
|
|
<div class="line"><a name="l01856"></a><span class="lineno"> 1856</span> </div>
|
|
<div class="line"><a name="l01857"></a><span class="lineno"> 1857</span> </div>
|
|
<div class="line"><a name="l01858"></a><span class="lineno"> 1858</span> </div>
|
|
<div class="line"><a name="l01859"></a><span class="lineno"> 1859</span> <span class="comment">/***************************************************************************</span></div>
|
|
<div class="line"><a name="l01860"></a><span class="lineno"> 1860</span> <span class="comment"> * heavily optimized table computations</span></div>
|
|
<div class="line"><a name="l01861"></a><span class="lineno"> 1861</span> <span class="comment"> ***************************************************************************/</span></div>
|
|
<div class="line"><a name="l01862"></a><span class="lineno"> 1862</span> </div>
|
|
<div class="line"><a name="l01863"></a><span class="lineno"> 1863</span> </div>
|
|
<div class="line"><a name="l01864"></a><span class="lineno"> 1864</span> <span class="keyword">static</span> <span class="keyword">inline</span> <span class="keywordtype">void</span> fvec_madd_ref (<span class="keywordtype">size_t</span> n, <span class="keyword">const</span> <span class="keywordtype">float</span> *a,</div>
|
|
<div class="line"><a name="l01865"></a><span class="lineno"> 1865</span>  <span class="keywordtype">float</span> bf, <span class="keyword">const</span> <span class="keywordtype">float</span> *b, <span class="keywordtype">float</span> *c) {</div>
|
|
<div class="line"><a name="l01866"></a><span class="lineno"> 1866</span>  <span class="keywordflow">for</span> (<span class="keywordtype">size_t</span> i = 0; i < n; i++)</div>
|
|
<div class="line"><a name="l01867"></a><span class="lineno"> 1867</span>  c[i] = a[i] + bf * b[i];</div>
|
|
<div class="line"><a name="l01868"></a><span class="lineno"> 1868</span> }</div>
|
|
<div class="line"><a name="l01869"></a><span class="lineno"> 1869</span> </div>
|
|
<div class="line"><a name="l01870"></a><span class="lineno"> 1870</span> </div>
|
|
<div class="line"><a name="l01871"></a><span class="lineno"> 1871</span> <span class="keyword">static</span> <span class="keyword">inline</span> <span class="keywordtype">void</span> fvec_madd_sse (<span class="keywordtype">size_t</span> n, <span class="keyword">const</span> <span class="keywordtype">float</span> *a,</div>
|
|
<div class="line"><a name="l01872"></a><span class="lineno"> 1872</span>  <span class="keywordtype">float</span> bf, <span class="keyword">const</span> <span class="keywordtype">float</span> *b, <span class="keywordtype">float</span> *c) {</div>
|
|
<div class="line"><a name="l01873"></a><span class="lineno"> 1873</span>  n >>= 2;</div>
|
|
<div class="line"><a name="l01874"></a><span class="lineno"> 1874</span>  __m128 bf4 = _mm_set_ps1 (bf);</div>
|
|
<div class="line"><a name="l01875"></a><span class="lineno"> 1875</span>  __m128 * a4 = (__m128*)a;</div>
|
|
<div class="line"><a name="l01876"></a><span class="lineno"> 1876</span>  __m128 * b4 = (__m128*)b;</div>
|
|
<div class="line"><a name="l01877"></a><span class="lineno"> 1877</span>  __m128 * c4 = (__m128*)c;</div>
|
|
<div class="line"><a name="l01878"></a><span class="lineno"> 1878</span> </div>
|
|
<div class="line"><a name="l01879"></a><span class="lineno"> 1879</span>  <span class="keywordflow">while</span> (n--) {</div>
|
|
<div class="line"><a name="l01880"></a><span class="lineno"> 1880</span>  *c4 = _mm_add_ps (*a4, _mm_mul_ps (bf4, *b4));</div>
|
|
<div class="line"><a name="l01881"></a><span class="lineno"> 1881</span>  b4++;</div>
|
|
<div class="line"><a name="l01882"></a><span class="lineno"> 1882</span>  a4++;</div>
|
|
<div class="line"><a name="l01883"></a><span class="lineno"> 1883</span>  c4++;</div>
|
|
<div class="line"><a name="l01884"></a><span class="lineno"> 1884</span>  }</div>
|
|
<div class="line"><a name="l01885"></a><span class="lineno"> 1885</span> }</div>
|
|
<div class="line"><a name="l01886"></a><span class="lineno"> 1886</span> </div>
|
|
<div class="line"><a name="l01887"></a><span class="lineno"><a class="line" href="namespacefaiss.html#a40328c31abd0bbba5bd95d7de951e847"> 1887</a></span> <span class="keywordtype">void</span> <a class="code" href="namespacefaiss.html#a40328c31abd0bbba5bd95d7de951e847">fvec_madd</a> (<span class="keywordtype">size_t</span> n, <span class="keyword">const</span> <span class="keywordtype">float</span> *a,</div>
|
|
<div class="line"><a name="l01888"></a><span class="lineno"> 1888</span>  <span class="keywordtype">float</span> bf, <span class="keyword">const</span> <span class="keywordtype">float</span> *b, <span class="keywordtype">float</span> *c)</div>
|
|
<div class="line"><a name="l01889"></a><span class="lineno"> 1889</span> {</div>
|
|
<div class="line"><a name="l01890"></a><span class="lineno"> 1890</span>  <span class="keywordflow">if</span> ((n & 3) == 0 &&</div>
|
|
<div class="line"><a name="l01891"></a><span class="lineno"> 1891</span>  ((((<span class="keywordtype">long</span>)a) | ((<span class="keywordtype">long</span>)b) | ((<span class="keywordtype">long</span>)c)) & 15) == 0)</div>
|
|
<div class="line"><a name="l01892"></a><span class="lineno"> 1892</span>  fvec_madd_sse (n, a, bf, b, c);</div>
|
|
<div class="line"><a name="l01893"></a><span class="lineno"> 1893</span>  <span class="keywordflow">else</span></div>
|
|
<div class="line"><a name="l01894"></a><span class="lineno"> 1894</span>  fvec_madd_ref (n, a, bf, b, c);</div>
|
|
<div class="line"><a name="l01895"></a><span class="lineno"> 1895</span> }</div>
|
|
<div class="line"><a name="l01896"></a><span class="lineno"> 1896</span> </div>
|
|
<div class="line"><a name="l01897"></a><span class="lineno"> 1897</span> <span class="keyword">static</span> <span class="keyword">inline</span> <span class="keywordtype">int</span> fvec_madd_and_argmin_ref (<span class="keywordtype">size_t</span> n, <span class="keyword">const</span> <span class="keywordtype">float</span> *a,</div>
|
|
<div class="line"><a name="l01898"></a><span class="lineno"> 1898</span>  <span class="keywordtype">float</span> bf, <span class="keyword">const</span> <span class="keywordtype">float</span> *b, <span class="keywordtype">float</span> *c) {</div>
|
|
<div class="line"><a name="l01899"></a><span class="lineno"> 1899</span>  <span class="keywordtype">float</span> vmin = 1e20;</div>
|
|
<div class="line"><a name="l01900"></a><span class="lineno"> 1900</span>  <span class="keywordtype">int</span> imin = -1;</div>
|
|
<div class="line"><a name="l01901"></a><span class="lineno"> 1901</span> </div>
|
|
<div class="line"><a name="l01902"></a><span class="lineno"> 1902</span>  <span class="keywordflow">for</span> (<span class="keywordtype">size_t</span> i = 0; i < n; i++) {</div>
|
|
<div class="line"><a name="l01903"></a><span class="lineno"> 1903</span>  c[i] = a[i] + bf * b[i];</div>
|
|
<div class="line"><a name="l01904"></a><span class="lineno"> 1904</span>  <span class="keywordflow">if</span> (c[i] < vmin) {</div>
|
|
<div class="line"><a name="l01905"></a><span class="lineno"> 1905</span>  vmin = c[i];</div>
|
|
<div class="line"><a name="l01906"></a><span class="lineno"> 1906</span>  imin = i;</div>
|
|
<div class="line"><a name="l01907"></a><span class="lineno"> 1907</span>  }</div>
|
|
<div class="line"><a name="l01908"></a><span class="lineno"> 1908</span>  }</div>
|
|
<div class="line"><a name="l01909"></a><span class="lineno"> 1909</span>  <span class="keywordflow">return</span> imin;</div>
|
|
<div class="line"><a name="l01910"></a><span class="lineno"> 1910</span> }</div>
|
|
<div class="line"><a name="l01911"></a><span class="lineno"> 1911</span> </div>
|
|
<div class="line"><a name="l01912"></a><span class="lineno"> 1912</span> <span class="keyword">static</span> <span class="keyword">inline</span> <span class="keywordtype">int</span> fvec_madd_and_argmin_sse (<span class="keywordtype">size_t</span> n, <span class="keyword">const</span> <span class="keywordtype">float</span> *a,</div>
|
|
<div class="line"><a name="l01913"></a><span class="lineno"> 1913</span>  <span class="keywordtype">float</span> bf, <span class="keyword">const</span> <span class="keywordtype">float</span> *b, <span class="keywordtype">float</span> *c) {</div>
|
|
<div class="line"><a name="l01914"></a><span class="lineno"> 1914</span>  n >>= 2;</div>
|
|
<div class="line"><a name="l01915"></a><span class="lineno"> 1915</span>  __m128 bf4 = _mm_set_ps1 (bf);</div>
|
|
<div class="line"><a name="l01916"></a><span class="lineno"> 1916</span>  __m128 vmin4 = _mm_set_ps1 (1e20);</div>
|
|
<div class="line"><a name="l01917"></a><span class="lineno"> 1917</span>  __m128i imin4 = _mm_set1_epi32 (-1);</div>
|
|
<div class="line"><a name="l01918"></a><span class="lineno"> 1918</span>  __m128i idx4 = _mm_set_epi32 (3, 2, 1, 0);</div>
|
|
<div class="line"><a name="l01919"></a><span class="lineno"> 1919</span>  __m128i inc4 = _mm_set1_epi32 (4);</div>
|
|
<div class="line"><a name="l01920"></a><span class="lineno"> 1920</span>  __m128 * a4 = (__m128*)a;</div>
|
|
<div class="line"><a name="l01921"></a><span class="lineno"> 1921</span>  __m128 * b4 = (__m128*)b;</div>
|
|
<div class="line"><a name="l01922"></a><span class="lineno"> 1922</span>  __m128 * c4 = (__m128*)c;</div>
|
|
<div class="line"><a name="l01923"></a><span class="lineno"> 1923</span> </div>
|
|
<div class="line"><a name="l01924"></a><span class="lineno"> 1924</span>  <span class="keywordflow">while</span> (n--) {</div>
|
|
<div class="line"><a name="l01925"></a><span class="lineno"> 1925</span>  __m128 vc4 = _mm_add_ps (*a4, _mm_mul_ps (bf4, *b4));</div>
|
|
<div class="line"><a name="l01926"></a><span class="lineno"> 1926</span>  *c4 = vc4;</div>
|
|
<div class="line"><a name="l01927"></a><span class="lineno"> 1927</span>  __m128i mask = (__m128i)_mm_cmpgt_ps (vmin4, vc4);</div>
|
|
<div class="line"><a name="l01928"></a><span class="lineno"> 1928</span>  <span class="comment">// imin4 = _mm_blendv_epi8 (imin4, idx4, mask); // slower!</span></div>
|
|
<div class="line"><a name="l01929"></a><span class="lineno"> 1929</span> </div>
|
|
<div class="line"><a name="l01930"></a><span class="lineno"> 1930</span>  imin4 = _mm_or_si128 (_mm_and_si128 (mask, idx4),</div>
|
|
<div class="line"><a name="l01931"></a><span class="lineno"> 1931</span>  _mm_andnot_si128 (mask, imin4));</div>
|
|
<div class="line"><a name="l01932"></a><span class="lineno"> 1932</span>  vmin4 = _mm_min_ps (vmin4, vc4);</div>
|
|
<div class="line"><a name="l01933"></a><span class="lineno"> 1933</span>  b4++;</div>
|
|
<div class="line"><a name="l01934"></a><span class="lineno"> 1934</span>  a4++;</div>
|
|
<div class="line"><a name="l01935"></a><span class="lineno"> 1935</span>  c4++;</div>
|
|
<div class="line"><a name="l01936"></a><span class="lineno"> 1936</span>  idx4 = _mm_add_epi32 (idx4, inc4);</div>
|
|
<div class="line"><a name="l01937"></a><span class="lineno"> 1937</span>  }</div>
|
|
<div class="line"><a name="l01938"></a><span class="lineno"> 1938</span> </div>
|
|
<div class="line"><a name="l01939"></a><span class="lineno"> 1939</span>  <span class="comment">// 4 values -> 2</span></div>
|
|
<div class="line"><a name="l01940"></a><span class="lineno"> 1940</span>  {</div>
|
|
<div class="line"><a name="l01941"></a><span class="lineno"> 1941</span>  idx4 = _mm_shuffle_epi32 (imin4, 3 << 2 | 2);</div>
|
|
<div class="line"><a name="l01942"></a><span class="lineno"> 1942</span>  __m128 vc4 = _mm_shuffle_ps (vmin4, vmin4, 3 << 2 | 2);</div>
|
|
<div class="line"><a name="l01943"></a><span class="lineno"> 1943</span>  __m128i mask = (__m128i)_mm_cmpgt_ps (vmin4, vc4);</div>
|
|
<div class="line"><a name="l01944"></a><span class="lineno"> 1944</span>  imin4 = _mm_or_si128 (_mm_and_si128 (mask, idx4),</div>
|
|
<div class="line"><a name="l01945"></a><span class="lineno"> 1945</span>  _mm_andnot_si128 (mask, imin4));</div>
|
|
<div class="line"><a name="l01946"></a><span class="lineno"> 1946</span>  vmin4 = _mm_min_ps (vmin4, vc4);</div>
|
|
<div class="line"><a name="l01947"></a><span class="lineno"> 1947</span>  }</div>
|
|
<div class="line"><a name="l01948"></a><span class="lineno"> 1948</span>  <span class="comment">// 2 values -> 1</span></div>
|
|
<div class="line"><a name="l01949"></a><span class="lineno"> 1949</span>  {</div>
|
|
<div class="line"><a name="l01950"></a><span class="lineno"> 1950</span>  idx4 = _mm_shuffle_epi32 (imin4, 1);</div>
|
|
<div class="line"><a name="l01951"></a><span class="lineno"> 1951</span>  __m128 vc4 = _mm_shuffle_ps (vmin4, vmin4, 1);</div>
|
|
<div class="line"><a name="l01952"></a><span class="lineno"> 1952</span>  __m128i mask = (__m128i)_mm_cmpgt_ps (vmin4, vc4);</div>
|
|
<div class="line"><a name="l01953"></a><span class="lineno"> 1953</span>  imin4 = _mm_or_si128 (_mm_and_si128 (mask, idx4),</div>
|
|
<div class="line"><a name="l01954"></a><span class="lineno"> 1954</span>  _mm_andnot_si128 (mask, imin4));</div>
|
|
<div class="line"><a name="l01955"></a><span class="lineno"> 1955</span>  <span class="comment">// vmin4 = _mm_min_ps (vmin4, vc4);</span></div>
|
|
<div class="line"><a name="l01956"></a><span class="lineno"> 1956</span>  }</div>
|
|
<div class="line"><a name="l01957"></a><span class="lineno"> 1957</span>  <span class="keywordflow">return</span> _mm_extract_epi32 (imin4, 0);</div>
|
|
<div class="line"><a name="l01958"></a><span class="lineno"> 1958</span> }</div>
|
|
<div class="line"><a name="l01959"></a><span class="lineno"> 1959</span> </div>
|
|
<div class="line"><a name="l01960"></a><span class="lineno"> 1960</span> </div>
|
|
<div class="line"><a name="l01961"></a><span class="lineno"><a class="line" href="namespacefaiss.html#a9da63b8bb84460f5e8ccf8e17622cc7a"> 1961</a></span> <span class="keywordtype">int</span> <a class="code" href="namespacefaiss.html#a9da63b8bb84460f5e8ccf8e17622cc7a">fvec_madd_and_argmin</a> (<span class="keywordtype">size_t</span> n, <span class="keyword">const</span> <span class="keywordtype">float</span> *a,</div>
|
|
<div class="line"><a name="l01962"></a><span class="lineno"> 1962</span>  <span class="keywordtype">float</span> bf, <span class="keyword">const</span> <span class="keywordtype">float</span> *b, <span class="keywordtype">float</span> *c)</div>
|
|
<div class="line"><a name="l01963"></a><span class="lineno"> 1963</span> {</div>
|
|
<div class="line"><a name="l01964"></a><span class="lineno"> 1964</span>  <span class="keywordflow">if</span> ((n & 3) == 0 &&</div>
|
|
<div class="line"><a name="l01965"></a><span class="lineno"> 1965</span>  ((((<span class="keywordtype">long</span>)a) | ((<span class="keywordtype">long</span>)b) | ((<span class="keywordtype">long</span>)c)) & 15) == 0)</div>
|
|
<div class="line"><a name="l01966"></a><span class="lineno"> 1966</span>  <span class="keywordflow">return</span> fvec_madd_and_argmin_sse (n, a, bf, b, c);</div>
|
|
<div class="line"><a name="l01967"></a><span class="lineno"> 1967</span>  <span class="keywordflow">else</span></div>
|
|
<div class="line"><a name="l01968"></a><span class="lineno"> 1968</span>  <span class="keywordflow">return</span> fvec_madd_and_argmin_ref (n, a, bf, b, c);</div>
|
|
<div class="line"><a name="l01969"></a><span class="lineno"> 1969</span> }</div>
|
|
<div class="line"><a name="l01970"></a><span class="lineno"> 1970</span> </div>
|
|
<div class="line"><a name="l01971"></a><span class="lineno"> 1971</span> </div>
|
|
<div class="line"><a name="l01972"></a><span class="lineno"> 1972</span> </div>
|
|
<div class="line"><a name="l01973"></a><span class="lineno"><a class="line" href="namespacefaiss.html#a14884d253128c7af5891a65082ad7dc6"> 1973</a></span> <span class="keyword">const</span> <span class="keywordtype">float</span> *<a class="code" href="namespacefaiss.html#a14884d253128c7af5891a65082ad7dc6">fvecs_maybe_subsample</a> (</div>
|
|
<div class="line"><a name="l01974"></a><span class="lineno"> 1974</span>  <span class="keywordtype">size_t</span> d, <span class="keywordtype">size_t</span> *n, <span class="keywordtype">size_t</span> nmax, <span class="keyword">const</span> <span class="keywordtype">float</span> *x,</div>
|
|
<div class="line"><a name="l01975"></a><span class="lineno"> 1975</span>  <span class="keywordtype">bool</span> verbose, <span class="keywordtype">long</span> seed)</div>
|
|
<div class="line"><a name="l01976"></a><span class="lineno"> 1976</span> {</div>
|
|
<div class="line"><a name="l01977"></a><span class="lineno"> 1977</span> </div>
|
|
<div class="line"><a name="l01978"></a><span class="lineno"> 1978</span>  <span class="keywordflow">if</span> (*n <= nmax) <span class="keywordflow">return</span> x; <span class="comment">// nothing to do</span></div>
|
|
<div class="line"><a name="l01979"></a><span class="lineno"> 1979</span> </div>
|
|
<div class="line"><a name="l01980"></a><span class="lineno"> 1980</span>  <span class="keywordtype">size_t</span> n2 = nmax;</div>
|
|
<div class="line"><a name="l01981"></a><span class="lineno"> 1981</span>  <span class="keywordflow">if</span> (verbose) {</div>
|
|
<div class="line"><a name="l01982"></a><span class="lineno"> 1982</span>  printf (<span class="stringliteral">" Input training set too big (max size is %ld), sampling "</span></div>
|
|
<div class="line"><a name="l01983"></a><span class="lineno"> 1983</span>  <span class="stringliteral">"%ld / %ld vectors\n"</span>, nmax, n2, *n);</div>
|
|
<div class="line"><a name="l01984"></a><span class="lineno"> 1984</span>  }</div>
|
|
<div class="line"><a name="l01985"></a><span class="lineno"> 1985</span>  std::vector<int> subset (*n);</div>
|
|
<div class="line"><a name="l01986"></a><span class="lineno"> 1986</span>  rand_perm (subset.data (), *n, seed);</div>
|
|
<div class="line"><a name="l01987"></a><span class="lineno"> 1987</span>  <span class="keywordtype">float</span> *x_subset = <span class="keyword">new</span> <span class="keywordtype">float</span>[n2 * d];</div>
|
|
<div class="line"><a name="l01988"></a><span class="lineno"> 1988</span>  <span class="keywordflow">for</span> (<span class="keywordtype">long</span> i = 0; i < n2; i++)</div>
|
|
<div class="line"><a name="l01989"></a><span class="lineno"> 1989</span>  memcpy (&x_subset[i * d],</div>
|
|
<div class="line"><a name="l01990"></a><span class="lineno"> 1990</span>  &x[subset[i] * <span class="keywordtype">size_t</span>(d)],</div>
|
|
<div class="line"><a name="l01991"></a><span class="lineno"> 1991</span>  <span class="keyword">sizeof</span> (x[0]) * d);</div>
|
|
<div class="line"><a name="l01992"></a><span class="lineno"> 1992</span>  *n = n2;</div>
|
|
<div class="line"><a name="l01993"></a><span class="lineno"> 1993</span>  <span class="keywordflow">return</span> x_subset;</div>
|
|
<div class="line"><a name="l01994"></a><span class="lineno"> 1994</span> }</div>
|
|
<div class="line"><a name="l01995"></a><span class="lineno"> 1995</span> </div>
|
|
<div class="line"><a name="l01996"></a><span class="lineno"> 1996</span> </div>
|
|
<div class="line"><a name="l01997"></a><span class="lineno"> 1997</span> } <span class="comment">// namespace faiss</span></div>
|
|
<div class="ttc" id="structfaiss_1_1RandomGenerator_html"><div class="ttname"><a href="structfaiss_1_1RandomGenerator.html">faiss::RandomGenerator</a></div><div class="ttdoc">random generator that can be used in multithreaded contexts </div><div class="ttdef"><b>Definition:</b> <a href="utils_8h_source.html#l00048">utils.h:48</a></div></div>
|
|
<div class="ttc" id="namespacefaiss_html_aa2c6a9e87a64bba8e8014e14f70bde21"><div class="ttname"><a href="namespacefaiss.html#aa2c6a9e87a64bba8e8014e14f70bde21">faiss::km_update_centroids</a></div><div class="ttdeci">int km_update_centroids(const float *x, float *centroids, long *assign, size_t d, size_t k, size_t n, size_t k_frozen)</div><div class="ttdef"><b>Definition:</b> <a href="utils_8cpp_source.html#l01399">utils.cpp:1399</a></div></div>
|
|
<div class="ttc" id="namespacefaiss_html_a5eb1701e46123827966f2a56da893d1d"><div class="ttname"><a href="namespacefaiss.html#a5eb1701e46123827966f2a56da893d1d">faiss::knn_L2sqr_base_shift</a></div><div class="ttdeci">void knn_L2sqr_base_shift(const float *x, const float *y, size_t d, size_t nx, size_t ny, float_maxheap_array_t *res, const float *base_shift)</div><div class="ttdef"><b>Definition:</b> <a href="utils_8cpp_source.html#l00986">utils.cpp:986</a></div></div>
|
|
<div class="ttc" id="structfaiss_1_1RandomGenerator_html_a7633c373153f3b2824d2d99382ba20ab"><div class="ttname"><a href="structfaiss_1_1RandomGenerator.html#a7633c373153f3b2824d2d99382ba20ab">faiss::RandomGenerator::RandomGenerator</a></div><div class="ttdeci">RandomGenerator(long seed=1234)</div><div class="ttdoc">initialize </div></div>
|
|
<div class="ttc" id="namespacefaiss_html_a7466bd32de31640860393a701eaac5ad"><div class="ttname"><a href="namespacefaiss.html#a7466bd32de31640860393a701eaac5ad">faiss::fvec_L2sqr</a></div><div class="ttdeci">float fvec_L2sqr(const float *x, const float *y, size_t d)</div><div class="ttdoc">Squared L2 distance between two vectors. </div><div class="ttdef"><b>Definition:</b> <a href="utils_8cpp_source.html#l00574">utils.cpp:574</a></div></div>
|
|
<div class="ttc" id="namespacefaiss_html_a154a47857ed321b9db91122770a16e09"><div class="ttname"><a href="namespacefaiss.html#a154a47857ed321b9db91122770a16e09">faiss::bincode_hist</a></div><div class="ttdeci">void bincode_hist(size_t n, size_t nbits, const uint8_t *codes, int *hist)</div><div class="ttdef"><b>Definition:</b> <a href="utils_8cpp_source.html#l01647">utils.cpp:1647</a></div></div>
|
|
<div class="ttc" id="namespacefaiss_html_a14884d253128c7af5891a65082ad7dc6"><div class="ttname"><a href="namespacefaiss.html#a14884d253128c7af5891a65082ad7dc6">faiss::fvecs_maybe_subsample</a></div><div class="ttdeci">const float * fvecs_maybe_subsample(size_t d, size_t *n, size_t nmax, const float *x, bool verbose, long seed)</div><div class="ttdef"><b>Definition:</b> <a href="utils_8cpp_source.html#l01973">utils.cpp:1973</a></div></div>
|
|
<div class="ttc" id="namespacefaiss_html_ae0ee1b6fbd3d6da0f1a3550a780ca24c"><div class="ttname"><a href="namespacefaiss.html#ae0ee1b6fbd3d6da0f1a3550a780ca24c">faiss::ranklist_handle_ties</a></div><div class="ttdeci">void ranklist_handle_ties(int k, long *idx, const float *dis)</div><div class="ttdef"><b>Definition:</b> <a href="utils_8cpp_source.html#l01493">utils.cpp:1493</a></div></div>
|
|
<div class="ttc" id="structfaiss_1_1RandomGenerator_html_ac82a433d7bfa56d750907ba5cf74aed7"><div class="ttname"><a href="structfaiss_1_1RandomGenerator.html#ac82a433d7bfa56d750907ba5cf74aed7">faiss::RandomGenerator::rand_float</a></div><div class="ttdeci">float rand_float()</div><div class="ttdoc">between 0 and 1 </div><div class="ttdef"><b>Definition:</b> <a href="utils_8cpp_source.html#l00211">utils.cpp:211</a></div></div>
|
|
<div class="ttc" id="namespacefaiss_html_a40328c31abd0bbba5bd95d7de951e847"><div class="ttname"><a href="namespacefaiss.html#a40328c31abd0bbba5bd95d7de951e847">faiss::fvec_madd</a></div><div class="ttdeci">void fvec_madd(size_t n, const float *a, float bf, const float *b, float *c)</div><div class="ttdef"><b>Definition:</b> <a href="utils_8cpp_source.html#l01887">utils.cpp:1887</a></div></div>
|
|
<div class="ttc" id="namespacefaiss_html_aa3af5769b0b649f112332a874c64d361"><div class="ttname"><a href="namespacefaiss.html#aa3af5769b0b649f112332a874c64d361">faiss::get_mem_usage_kb</a></div><div class="ttdeci">size_t get_mem_usage_kb()</div><div class="ttdoc">get current RSS usage in kB </div></div>
|
|
<div class="ttc" id="namespacefaiss_html_a7bea462108bee98d8d5859b51ec4db8e"><div class="ttname"><a href="namespacefaiss.html#a7bea462108bee98d8d5859b51ec4db8e">faiss::ivec_hist</a></div><div class="ttdeci">int ivec_hist(size_t n, const int *v, int vmax, int *hist)</div><div class="ttdoc">compute histogram on v </div><div class="ttdef"><b>Definition:</b> <a href="utils_8cpp_source.html#l01636">utils.cpp:1636</a></div></div>
|
|
<div class="ttc" id="structfaiss_1_1RandomGenerator_html_acbceaa8b017793ca4f8d90e644b0d7f4"><div class="ttname"><a href="structfaiss_1_1RandomGenerator.html#acbceaa8b017793ca4f8d90e644b0d7f4">faiss::RandomGenerator::rand_long</a></div><div class="ttdeci">long rand_long()</div><div class="ttdoc">random long &lt; 2 ^ 62 </div></div>
|
|
<div class="ttc" id="namespacefaiss_html_afb7b33f6892678ba79aaf5e71777837c"><div class="ttname"><a href="namespacefaiss.html#afb7b33f6892678ba79aaf5e71777837c">faiss::merge_result_table_with</a></div><div class="ttdeci">size_t merge_result_table_with(size_t n, size_t k, long *I0, float *D0, const long *I1, const float *D1, bool keep_min, long translation)</div><div class="ttdef"><b>Definition:</b> <a href="utils_8cpp_source.html#l01509">utils.cpp:1509</a></div></div>
|
|
<div class="ttc" id="structfaiss_1_1RandomGenerator_html_a583f124ecacdbe037ac96e23a44dd420"><div class="ttname"><a href="structfaiss_1_1RandomGenerator.html#a583f124ecacdbe037ac96e23a44dd420">faiss::RandomGenerator::rand_int</a></div><div class="ttdeci">int rand_int()</div><div class="ttdoc">random 31-bit positive integer </div></div>
|
|
<div class="ttc" id="structfaiss_1_1HeapArray_html"><div class="ttname"><a href="structfaiss_1_1HeapArray.html">faiss::HeapArray</a></div><div class="ttdef"><b>Definition:</b> <a href="Heap_8h_source.html#l00350">Heap.h:350</a></div></div>
|
|
<div class="ttc" id="namespacefaiss_html_a8dbc652ba48d41f126b8815004899448"><div class="ttname"><a href="namespacefaiss.html#a8dbc652ba48d41f126b8815004899448">faiss::ranklist_intersection_size</a></div><div class="ttdeci">size_t ranklist_intersection_size(size_t k1, const long *v1, size_t k2, const long *v2_in)</div><div class="ttdef"><b>Definition:</b> <a href="utils_8cpp_source.html#l01574">utils.cpp:1574</a></div></div>
|
|
<div class="ttc" id="namespacefaiss_html_a3d9c7db82d43c1f0ab1d28b92bc9fe57"><div class="ttname"><a href="namespacefaiss.html#a3d9c7db82d43c1f0ab1d28b92bc9fe57">faiss::pairwise_L2sqr</a></div><div class="ttdeci">void pairwise_L2sqr(long d, long nq, const float *xq, long nb, const float *xb, float *dis, long ldq, long ldb, long ldd)</div><div class="ttdef"><b>Definition:</b> <a href="utils_8cpp_source.html#l01342">utils.cpp:1342</a></div></div>
|
|
<div class="ttc" id="namespacefaiss_html_ab29d725b808df6f142b80f21aa45e507"><div class="ttname"><a href="namespacefaiss.html#ab29d725b808df6f142b80f21aa45e507">faiss::range_search_inner_product</a></div><div class="ttdeci">void range_search_inner_product(const float *x, const float *y, size_t d, size_t nx, size_t ny, float radius, RangeSearchResult *res)</div><div class="ttdoc">same as range_search_L2sqr for the inner product similarity </div><div class="ttdef"><b>Definition:</b> <a href="utils_8cpp_source.html#l01280">utils.cpp:1280</a></div></div>
|
|
<div class="ttc" id="namespacefaiss_html_a880c7318971f866267a86945aaa61b17"><div class="ttname"><a href="namespacefaiss.html#a880c7318971f866267a86945aaa61b17">faiss::knn_inner_product</a></div><div class="ttdeci">void knn_inner_product(const float *x, const float *y, size_t d, size_t nx, size_t ny, float_minheap_array_t *res)</div><div class="ttdef"><b>Definition:</b> <a href="utils_8cpp_source.html#l00946">utils.cpp:946</a></div></div>
|
|
<div class="ttc" id="namespacefaiss_html_af2a71f7d5402ae02ce169a4cc83020eb"><div class="ttname"><a href="namespacefaiss.html#af2a71f7d5402ae02ce169a4cc83020eb">faiss::getmillisecs</a></div><div class="ttdeci">double getmillisecs()</div><div class="ttdoc">ms elapsed since some arbitrary epoch </div><div class="ttdef"><b>Definition:</b> <a href="utils_8cpp_source.html#l00074">utils.cpp:74</a></div></div>
|
|
<div class="ttc" id="structfaiss_1_1NopDistanceCorrection_html"><div class="ttname"><a href="structfaiss_1_1NopDistanceCorrection.html">faiss::NopDistanceCorrection</a></div><div class="ttdef"><b>Definition:</b> <a href="utils_8cpp_source.html#l00960">utils.cpp:960</a></div></div>
|
|
<div class="ttc" id="namespacefaiss_html_a94c1d99ac39d22e362aa27ce7c7ff714"><div class="ttname"><a href="namespacefaiss.html#a94c1d99ac39d22e362aa27ce7c7ff714">faiss::imbalance_factor</a></div><div class="ttdeci">double imbalance_factor(int k, const int *hist)</div><div class="ttdoc">same, takes a histogram as input </div><div class="ttdef"><b>Definition:</b> <a href="utils_8cpp_source.html#l01612">utils.cpp:1612</a></div></div>
|
|
<div class="ttc" id="namespacefaiss_html_a7a49180ebf10e643217bbce5862c7f84"><div class="ttname"><a href="namespacefaiss.html#a7a49180ebf10e643217bbce5862c7f84">faiss::fvec_norm_L2sqr</a></div><div class="ttdeci">float fvec_norm_L2sqr(const float *x, size_t d)</div><div class="ttdef"><b>Definition:</b> <a href="utils_8cpp_source.html#l00632">utils.cpp:632</a></div></div>
|
|
<div class="ttc" id="namespacefaiss_html_a1faa7bd079c9b1addf3058ddf882a000"><div class="ttname"><a href="namespacefaiss.html#a1faa7bd079c9b1addf3058ddf882a000">faiss::range_search_L2sqr</a></div><div class="ttdeci">void range_search_L2sqr(const float *x, const float *y, size_t d, size_t nx, size_t ny, float radius, RangeSearchResult *res)</div><div class="ttdef"><b>Definition:</b> <a href="utils_8cpp_source.html#l01265">utils.cpp:1265</a></div></div>
|
|
<div class="ttc" id="namespacefaiss_html_afb68fe89ad5e948974da1b70d7b4157c"><div class="ttname"><a href="namespacefaiss.html#afb68fe89ad5e948974da1b70d7b4157c">faiss::matrix_qr</a></div><div class="ttdeci">void matrix_qr(int m, int n, float *a)</div><div class="ttdef"><b>Definition:</b> <a href="utils_8cpp_source.html#l01320">utils.cpp:1320</a></div></div>
|
|
<div class="ttc" id="structfaiss_1_1RangeSearchResult_html"><div class="ttname"><a href="structfaiss_1_1RangeSearchResult.html">faiss::RangeSearchResult</a></div><div class="ttdef"><b>Definition:</b> <a href="AuxIndexStructures_8h_source.html#l00029">AuxIndexStructures.h:29</a></div></div>
|
|
<div class="ttc" id="namespacefaiss_html_a4369329c9dbdfe23e3f35d09ba7b5c6e"><div class="ttname"><a href="namespacefaiss.html#a4369329c9dbdfe23e3f35d09ba7b5c6e">faiss::ivec_checksum</a></div><div class="ttdeci">size_t ivec_checksum(size_t n, const int *a)</div><div class="ttdoc">compute a checksum on a table. </div><div class="ttdef"><b>Definition:</b> <a href="utils_8cpp_source.html#l01670">utils.cpp:1670</a></div></div>
|
|
<div class="ttc" id="structfaiss_1_1BaseShiftDistanceCorrection_html"><div class="ttname"><a href="structfaiss_1_1BaseShiftDistanceCorrection.html">faiss::BaseShiftDistanceCorrection</a></div><div class="ttdef"><b>Definition:</b> <a href="utils_8cpp_source.html#l00979">utils.cpp:979</a></div></div>
|
|
<div class="ttc" id="namespacefaiss_html_a9da63b8bb84460f5e8ccf8e17622cc7a"><div class="ttname"><a href="namespacefaiss.html#a9da63b8bb84460f5e8ccf8e17622cc7a">faiss::fvec_madd_and_argmin</a></div><div class="ttdeci">int fvec_madd_and_argmin(size_t n, const float *a, float bf, const float *b, float *c)</div><div class="ttdef"><b>Definition:</b> <a href="utils_8cpp_source.html#l01961">utils.cpp:1961</a></div></div>
|
|
<div class="ttc" id="namespacefaiss_html_a2f803e3d3b07cfab63699c89de161237"><div class="ttname"><a href="namespacefaiss.html#a2f803e3d3b07cfab63699c89de161237">faiss::knn_L2sqr</a></div><div class="ttdeci">void knn_L2sqr(const float *x, const float *y, size_t d, size_t nx, size_t ny, float_maxheap_array_t *res)</div><div class="ttdef"><b>Definition:</b> <a href="utils_8cpp_source.html#l00966">utils.cpp:966</a></div></div>
|
|
</div><!-- fragment --></div><!-- contents -->
|
|
<!-- start footer part -->
|
|
<hr class="footer"/><address class="footer"><small>
|
|
Generated by  <a href="http://www.doxygen.org/index.html">
|
|
<img class="footer" src="doxygen.png" alt="doxygen"/>
|
|
</a> 1.8.5
|
|
</small></address>
|
|
</body>
|
|
</html>
|