<divclass="line"><aname="l00006"></a><spanclass="lineno"> 6</span> <spanclass="comment"> * This source code is licensed under the CC-by-NC license found in the</span></div>
<divclass="line"><aname="l00007"></a><spanclass="lineno"> 7</span> <spanclass="comment"> * LICENSE file in the root directory of this source tree.</span></div>
<divclass="line"><aname="l00010"></a><spanclass="lineno"> 10</span> <spanclass="comment">// Copyright 2004-present Facebook. All Rights Reserved</span></div>
<divclass="line"><aname="l00043"></a><spanclass="lineno"> 43</span> <spanclass="comment">// make a set of nt training vectors in the unit cube</span></div>
<divclass="line"><aname="l00044"></a><spanclass="lineno"> 44</span> <spanclass="comment">// (could be the database)</span></div>
<divclass="line"><aname="l00045"></a><spanclass="lineno"> 45</span> <spanclass="keywordtype">size_t</span> nt = 100 * 1000;</div>
<divclass="line"><aname="l00048"></a><spanclass="lineno"> 48</span> <spanclass="comment">// Define the core quantizer</span></div>
<divclass="line"><aname="l00049"></a><spanclass="lineno"> 49</span> <spanclass="comment">// We choose a multiple inverted index for faster training with less data</span></div>
<divclass="line"><aname="l00050"></a><spanclass="lineno"> 50</span> <spanclass="comment">// and because it usually offers best accuracy/speed trade-offs</span></div>
<divclass="line"><aname="l00052"></a><spanclass="lineno"> 52</span> <spanclass="comment">// We here assume that its lifespan of this coarse quantizer will cover the</span></div>
<divclass="line"><aname="l00053"></a><spanclass="lineno"> 53</span> <spanclass="comment">// lifespan of the inverted-file quantizer IndexIVFFlat below</span></div>
<divclass="line"><aname="l00054"></a><spanclass="lineno"> 54</span> <spanclass="comment">// With dynamic allocation, one may give the responsability to free the</span></div>
<divclass="line"><aname="l00055"></a><spanclass="lineno"> 55</span> <spanclass="comment">// quantizer to the inverted-file index (with attribute do_delete_quantizer)</span></div>
<divclass="line"><aname="l00057"></a><spanclass="lineno"> 57</span> <spanclass="comment">// Note: a regular clustering algorithm would be defined as:</span></div>
<divclass="line"><aname="l00060"></a><spanclass="lineno"> 60</span> <spanclass="comment">// Use nhash=2 subquantizers used to define the product coarse quantizer</span></div>
<divclass="line"><aname="l00061"></a><spanclass="lineno"> 61</span> <spanclass="comment">// Number of bits: we will have 2^nbits_coarse centroids per subquantizer</span></div>
<divclass="line"><aname="l00062"></a><spanclass="lineno"> 62</span> <spanclass="comment">// meaning (2^12)^nhash distinct inverted lists</span></div>
<divclass="line"><aname="l00064"></a><spanclass="lineno"> 64</span> <spanclass="keywordtype">size_t</span> nbits_subq = int (log2 (nb+1) / 2); <spanclass="comment">// good choice in general</span></div>
<divclass="line"><aname="l00065"></a><spanclass="lineno"> 65</span> <spanclass="keywordtype">size_t</span> ncentroids = 1 << (nhash * nbits_subq); <spanclass="comment">// total # of centroids</span></div>
<divclass="line"><aname="l00072"></a><spanclass="lineno"> 72</span> <spanclass="comment">// the coarse quantizer should not be dealloced before the index</span></div>
<divclass="line"><aname="l00073"></a><spanclass="lineno"> 73</span> <spanclass="comment">// 4 = nb of bytes per code (d must be a multiple of this)</span></div>
<divclass="line"><aname="l00074"></a><spanclass="lineno"> 74</span> <spanclass="comment">// 8 = nb of bits per sub-code (almost always 8)</span></div>
<divclass="line"><aname="l00075"></a><spanclass="lineno"> 75</span> <aclass="code"href="namespacefaiss.html#afd12191c638da74760ff397cf319752c">faiss::MetricType</a> metric = faiss::METRIC_L2; <spanclass="comment">// can be METRIC_INNER_PRODUCT</span></div>
<divclass="line"><aname="l00076"></a><spanclass="lineno"> 76</span> <aclass="code"href="structfaiss_1_1IndexIVFFlat.html">faiss::IndexIVFFlat</a> index (&coarse_quantizer, d, ncentroids, metric);</div>
<divclass="line"><aname="l00079"></a><spanclass="lineno"> 79</span> <spanclass="comment">// define the number of probes. 2048 is for high-dim, overkilled in practice</span></div>
<divclass="line"><aname="l00080"></a><spanclass="lineno"> 80</span> <spanclass="comment">// Use 4-1024 depending on the trade-off speed accuracy that you want</span></div>
<divclass="line"><aname="l00088"></a><spanclass="lineno"> 88</span>  std::vector <float> trainvecs (nt * d);</div>
<divclass="line"><aname="l00089"></a><spanclass="lineno"> 89</span> <spanclass="keywordflow">for</span> (<spanclass="keywordtype">size_t</span> i = 0; i < nt * d; i++) {</div>
<divclass="line"><aname="l00101"></a><spanclass="lineno"> 101</span>  { <spanclass="comment">// populating the database</span></div>
<divclass="line"><aname="l00102"></a><spanclass="lineno"> 102</span>  printf (<spanclass="stringliteral">"[%.3f s] Building a dataset of %ld vectors to index\n"</span>,</div>
<divclass="line"><aname="l00106"></a><spanclass="lineno"> 106</span> <spanclass="keywordflow">for</span> (<spanclass="keywordtype">size_t</span> i = 0; i < nb * d; i++) {</div>
<divclass="line"><aname="l00110"></a><spanclass="lineno"> 110</span>  printf (<spanclass="stringliteral">"[%.3f s] Adding the vectors to the index\n"</span>, elapsed() - t0);</div>
<divclass="line"><aname="l00114"></a><spanclass="lineno"> 114</span> <spanclass="comment">// remember a few elements from the database as queries</span></div>
<divclass="line"><aname="l00130"></a><spanclass="lineno"> 130</span> <spanclass="stringliteral">"of %ld vectors in the index\n"</span>,</div>
<divclass="ttc"id="namespacefaiss_html_afd12191c638da74760ff397cf319752c"><divclass="ttname"><ahref="namespacefaiss.html#afd12191c638da74760ff397cf319752c">faiss::MetricType</a></div><divclass="ttdeci">MetricType</div><divclass="ttdoc">Some algorithms support both an inner product vetsion and a L2 search version. </div><divclass="ttdef"><b>Definition:</b><ahref="Index_8h_source.html#l00044">Index.h:44</a></div></div>
</div><!-- fragment --></div><!-- contents -->
<!-- start footer part -->
<hrclass="footer"/><addressclass="footer"><small>
Generated by  <ahref="http://www.doxygen.org/index.html">