2017-02-23 06:26:44 +08:00
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
< html xmlns = "http://www.w3.org/1999/xhtml" >
< head >
< meta http-equiv = "Content-Type" content = "text/xhtml;charset=UTF-8" / >
< meta http-equiv = "X-UA-Compatible" content = "IE=9" / >
< meta name = "generator" content = "Doxygen 1.8.5" / >
2017-03-01 17:50:47 +08:00
< title > Faiss: L2Norm.cu Source File< / title >
2017-02-23 06:26:44 +08:00
< link href = "tabs.css" rel = "stylesheet" type = "text/css" / >
< script type = "text/javascript" src = "jquery.js" > < / script >
< script type = "text/javascript" src = "dynsections.js" > < / script >
< link href = "search/search.css" rel = "stylesheet" type = "text/css" / >
< script type = "text/javascript" src = "search/search.js" > < / script >
< script type = "text/javascript" >
$(document).ready(function() { searchBox.OnSelectItem(0); });
< / script >
< link href = "doxygen.css" rel = "stylesheet" type = "text/css" / >
< / head >
< body >
< div id = "top" > <!-- do not remove this div, it is closed by doxygen! -->
< div id = "titlearea" >
< table cellspacing = "0" cellpadding = "0" >
< tbody >
< tr style = "height: 56px;" >
< td style = "padding-left: 0.5em;" >
< div id = "projectname" > Faiss
< / div >
< / td >
< / tr >
< / tbody >
< / table >
< / div >
<!-- end header part -->
<!-- Generated by Doxygen 1.8.5 -->
< script type = "text/javascript" >
var searchBox = new SearchBox("searchBox", "search",false,'Search');
< / script >
< div id = "navrow1" class = "tabs" >
< ul class = "tablist" >
< li > < a href = "index.html" > < span > Main  Page< / span > < / a > < / li >
< li > < a href = "namespaces.html" > < span > Namespaces< / span > < / a > < / li >
< li > < a href = "annotated.html" > < span > Classes< / span > < / a > < / li >
< li class = "current" > < a href = "files.html" > < span > Files< / span > < / a > < / li >
< li >
< div id = "MSearchBox" class = "MSearchBoxInactive" >
< span class = "left" >
< img id = "MSearchSelect" src = "search/mag_sel.png"
onmouseover="return searchBox.OnSearchSelectShow()"
onmouseout="return searchBox.OnSearchSelectHide()"
alt=""/>
< input type = "text" id = "MSearchField" value = "Search" accesskey = "S"
onfocus="searchBox.OnSearchFieldFocus(true)"
onblur="searchBox.OnSearchFieldFocus(false)"
onkeyup="searchBox.OnSearchFieldChange(event)"/>
< / span > < span class = "right" >
< a id = "MSearchClose" href = "javascript:searchBox.CloseResultsWindow()" > < img id = "MSearchCloseImg" border = "0" src = "search/close.png" alt = "" / > < / a >
< / span >
< / div >
< / li >
< / ul >
< / div >
< div id = "navrow2" class = "tabs2" >
< ul class = "tablist" >
< li > < a href = "files.html" > < span > File  List< / span > < / a > < / li >
< / ul >
< / div >
<!-- window showing the filter options -->
< div id = "MSearchSelectWindow"
onmouseover="return searchBox.OnSearchSelectShow()"
onmouseout="return searchBox.OnSearchSelectHide()"
onkeydown="return searchBox.OnSearchSelectKey(event)">
< a class = "SelectItem" href = "javascript:void(0)" onclick = "searchBox.OnSelectItem(0)" > < span class = "SelectionMark" >   < / span > All< / a > < a class = "SelectItem" href = "javascript:void(0)" onclick = "searchBox.OnSelectItem(1)" > < span class = "SelectionMark" >   < / span > Classes< / a > < a class = "SelectItem" href = "javascript:void(0)" onclick = "searchBox.OnSelectItem(2)" > < span class = "SelectionMark" >   < / span > Namespaces< / a > < a class = "SelectItem" href = "javascript:void(0)" onclick = "searchBox.OnSelectItem(3)" > < span class = "SelectionMark" >   < / span > Functions< / a > < a class = "SelectItem" href = "javascript:void(0)" onclick = "searchBox.OnSelectItem(4)" > < span class = "SelectionMark" >   < / span > Variables< / a > < a class = "SelectItem" href = "javascript:void(0)" onclick = "searchBox.OnSelectItem(5)" > < span class = "SelectionMark" >   < / span > Typedefs< / a > < a class = "SelectItem" href = "javascript:void(0)" onclick = "searchBox.OnSelectItem(6)" > < span class = "SelectionMark" >   < / span > Enumerations< / a > < a class = "SelectItem" href = "javascript:void(0)" onclick = "searchBox.OnSelectItem(7)" > < span class = "SelectionMark" >   < / span > Enumerator< / a > < a class = "SelectItem" href = "javascript:void(0)" onclick = "searchBox.OnSelectItem(8)" > < span class = "SelectionMark" >   < / span > Friends< / a > < / div >
<!-- iframe showing the search results (closed by default) -->
< div id = "MSearchResultsWindow" >
< iframe src = "javascript:void(0)" frameborder = "0"
name="MSearchResults" id="MSearchResults">
< / iframe >
< / div >
< div id = "nav-path" class = "navpath" >
< ul >
< li class = "navelem" > < a class = "el" href = "dir_6b3ae6988449b0834e9596fad5d75199.html" > gpu< / a > < / li > < li class = "navelem" > < a class = "el" href = "dir_49d1182a3b8dfb62757c53ae905481ad.html" > impl< / a > < / li > < / ul >
< / div >
< / div > <!-- top -->
< div class = "header" >
< div class = "headertitle" >
< div class = "title" > L2Norm.cu< / div > < / div >
< / div > <!-- header -->
< div class = "contents" >
< div class = "fragment" > < div class = "line" > < a name = "l00001" > < / a > < span class = "lineno" > 1< / span >   < span class = "comment" > < / span > < / div >
< div class = "line" > < a name = "l00002" > < / a > < span class = "lineno" > 2< / span >   < span class = "comment" > /**< / span > < / div >
< div class = "line" > < a name = "l00003" > < / a > < span class = "lineno" > 3< / span >   < span class = "comment" > * Copyright (c) 2015-present, Facebook, Inc.< / span > < / div >
< div class = "line" > < a name = "l00004" > < / a > < span class = "lineno" > 4< / span >   < span class = "comment" > * All rights reserved.< / span > < / div >
< div class = "line" > < a name = "l00005" > < / a > < span class = "lineno" > 5< / span >   < span class = "comment" > *< / span > < / div >
< div class = "line" > < a name = "l00006" > < / a > < span class = "lineno" > 6< / span >   < span class = "comment" > * This source code is licensed under the CC-by-NC license found in the< / span > < / div >
< div class = "line" > < a name = "l00007" > < / a > < span class = "lineno" > 7< / span >   < span class = "comment" > * LICENSE file in the root directory of this source tree.< / span > < / div >
< div class = "line" > < a name = "l00008" > < / a > < span class = "lineno" > 8< / span >   < span class = "comment" > */< / span > < / div >
< div class = "line" > < a name = "l00009" > < / a > < span class = "lineno" > 9< / span >   < / div >
< div class = "line" > < a name = "l00010" > < / a > < span class = "lineno" > 10< / span >   < span class = "comment" > // Copyright 2004-present Facebook. All Rights Reserved.< / span > < / div >
< div class = "line" > < a name = "l00011" > < / a > < span class = "lineno" > 11< / span >   < / div >
< div class = "line" > < a name = "l00012" > < / a > < span class = "lineno" > 12< / span >   < span class = "preprocessor" > #include " L2Norm.cuh" < / span > < / div >
< div class = "line" > < a name = "l00013" > < / a > < span class = "lineno" > 13< / span >   < span class = "preprocessor" > #include " ../../FaissAssert.h" < / span > < / div >
< div class = "line" > < a name = "l00014" > < / a > < span class = "lineno" > 14< / span >   < span class = "preprocessor" > #include " ../utils/ConversionOperators.cuh" < / span > < / div >
< div class = "line" > < a name = "l00015" > < / a > < span class = "lineno" > 15< / span >   < span class = "preprocessor" > #include " ../utils/DeviceDefs.cuh" < / span > < / div >
< div class = "line" > < a name = "l00016" > < / a > < span class = "lineno" > 16< / span >   < span class = "preprocessor" > #include " ../utils/DeviceUtils.h" < / span > < / div >
< div class = "line" > < a name = "l00017" > < / a > < span class = "lineno" > 17< / span >   < span class = "preprocessor" > #include " ../utils/Float16.cuh" < / span > < / div >
< div class = "line" > < a name = "l00018" > < / a > < span class = "lineno" > 18< / span >   < span class = "preprocessor" > #include " ../utils/MathOperators.cuh" < / span > < / div >
< div class = "line" > < a name = "l00019" > < / a > < span class = "lineno" > 19< / span >   < span class = "preprocessor" > #include " ../utils/PtxUtils.cuh" < / span > < / div >
< div class = "line" > < a name = "l00020" > < / a > < span class = "lineno" > 20< / span >   < span class = "preprocessor" > #include " ../utils/StaticUtils.h" < / span > < / div >
< div class = "line" > < a name = "l00021" > < / a > < span class = "lineno" > 21< / span >   < span class = "preprocessor" > #include " ../utils/Reductions.cuh" < / span > < / div >
< div class = "line" > < a name = "l00022" > < / a > < span class = "lineno" > 22< / span >   < / div >
< div class = "line" > < a name = "l00023" > < / a > < span class = "lineno" > 23< / span >   < span class = "keyword" > namespace < / span > faiss { < span class = "keyword" > namespace < / span > gpu {< / div >
< div class = "line" > < a name = "l00024" > < / a > < span class = "lineno" > 24< / span >   < / div >
< div class = "line" > < a name = "l00025" > < / a > < span class = "lineno" > 25< / span >   < span class = "comment" > // Input: (batch x dim), # repeats< / span > < / div >
< div class = "line" > < a name = "l00026" > < / a > < span class = "lineno" > 26< / span >   < span class = "comment" > // Output: (# repeats, norm of batch vector)< / span > < / div >
< div class = "line" > < a name = "l00027" > < / a > < span class = "lineno" > 27< / span >   < span class = "comment" > // Done under the presumption that the dimension size is not too large< / span > < / div >
< div class = "line" > < a name = "l00028" > < / a > < span class = "lineno" > 28< / span >   < span class = "comment" > // (< 10k or so), since there wouldn' t be enough parallelism applying a< / span > < / div >
< div class = "line" > < a name = "l00029" > < / a > < span class = "lineno" > 29< / span >   < span class = "comment" > // single block to the problem. Also that each vector is large enough< / span > < / div >
< div class = "line" > < a name = "l00030" > < / a > < span class = "lineno" > 30< / span >   < span class = "comment" > // (> 64), since a single block works on multiple rows' norms at the< / span > < / div >
< div class = "line" > < a name = "l00031" > < / a > < span class = "lineno" > 31< / span >   < span class = "comment" > // same time.< / span > < / div >
< div class = "line" > < a name = "l00032" > < / a > < span class = "lineno" > 32< / span >   < span class = "comment" > // T: the type we are doing the math in (e.g., float, half)< / span > < / div >
< div class = "line" > < a name = "l00033" > < / a > < span class = "lineno" > 33< / span >   < span class = "comment" > // TVec: the potentially vectorized type we are loading in (e.g.,< / span > < / div >
< div class = "line" > < a name = "l00034" > < / a > < span class = "lineno" > 34< / span >   < span class = "comment" > // float4, half2)< / span > < / div >
< div class = "line" > < a name = "l00035" > < / a > < span class = "lineno" > 35< / span >   < span class = "keyword" > template< / span > < < span class = "keyword" > typename< / span > T, < span class = "keyword" > typename< / span > TVec,< / div >
< div class = "line" > < a name = "l00036" > < / a > < span class = "lineno" > 36< / span >   < span class = "keywordtype" > int< / span > RowTileSize, < span class = "keywordtype" > bool< / span > NormLoop, < span class = "keywordtype" > bool< / span > NormSquared> < / div >
< div class = "line" > < a name = "l00037" > < / a > < span class = "lineno" > 37< / span >   __global__ < span class = "keywordtype" > void< / span > l2Norm(Tensor< TVec, 2, true> input,< / div >
< div class = "line" > < a name = "l00038" > < / a > < span class = "lineno" > 38< / span >   Tensor< T, 1, true> output) {< / div >
< div class = "line" > < a name = "l00039" > < / a > < span class = "lineno" > 39< / span >   < span class = "keyword" > extern< / span > __shared__ < span class = "keywordtype" > char< / span > smemByte[]; < span class = "comment" > // #warps * RowTileSize elements< / span > < / div >
< div class = "line" > < a name = "l00040" > < / a > < span class = "lineno" > 40< / span >   T* smem = (T*) smemByte;< / div >
< div class = "line" > < a name = "l00041" > < / a > < span class = "lineno" > 41< / span >   < / div >
< div class = "line" > < a name = "l00042" > < / a > < span class = "lineno" > 42< / span >   < span class = "keywordtype" > int< / span > numWarps = utils::divUp(blockDim.x, kWarpSize);< / div >
< div class = "line" > < a name = "l00043" > < / a > < span class = "lineno" > 43< / span >   < span class = "keywordtype" > int< / span > laneId = getLaneId();< / div >
< div class = "line" > < a name = "l00044" > < / a > < span class = "lineno" > 44< / span >   < span class = "keywordtype" > int< / span > warpId = threadIdx.x / kWarpSize;< / div >
< div class = "line" > < a name = "l00045" > < / a > < span class = "lineno" > 45< / span >   < / div >
< div class = "line" > < a name = "l00046" > < / a > < span class = "lineno" > 46< / span >   < span class = "keywordtype" > bool< / span > lastRowTile = (blockIdx.x == (gridDim.x - 1));< / div >
< div class = "line" > < a name = "l00047" > < / a > < span class = "lineno" > 47< / span >   < span class = "keywordtype" > int< / span > rowStart = RowTileSize * blockIdx.x;< / div >
< div class = "line" > < a name = "l00048" > < / a > < span class = "lineno" > 48< / span >   T rowNorm[RowTileSize];< / div >
< div class = "line" > < a name = "l00049" > < / a > < span class = "lineno" > 49< / span >   < / div >
< div class = "line" > < a name = "l00050" > < / a > < span class = "lineno" > 50< / span >   < span class = "keywordflow" > if< / span > (lastRowTile) {< / div >
< div class = "line" > < a name = "l00051" > < / a > < span class = "lineno" > 51< / span >   < span class = "comment" > // We are handling the very end of the input matrix rows< / span > < / div >
< div class = "line" > < a name = "l00052" > < / a > < span class = "lineno" > 52< / span >   < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > int< / span > row = 0; row < input.getSize(0) - rowStart; ++row) {< / div >
< div class = "line" > < a name = "l00053" > < / a > < span class = "lineno" > 53< / span >   < span class = "keywordflow" > if< / span > (NormLoop) {< / div >
< div class = "line" > < a name = "l00054" > < / a > < span class = "lineno" > 54< / span >   rowNorm[0] = Math< T> ::zero();< / div >
< div class = "line" > < a name = "l00055" > < / a > < span class = "lineno" > 55< / span >   < / div >
< div class = "line" > < a name = "l00056" > < / a > < span class = "lineno" > 56< / span >   < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > int< / span > col = threadIdx.x; col < input.getSize(1); col += blockDim.x) {< / div >
< div class = "line" > < a name = "l00057" > < / a > < span class = "lineno" > 57< / span >   TVec val = input[rowStart + row][col];< / div >
< div class = "line" > < a name = "l00058" > < / a > < span class = "lineno" > 58< / span >   val = Math< TVec> ::mul(val, val);< / div >
< div class = "line" > < a name = "l00059" > < / a > < span class = "lineno" > 59< / span >   rowNorm[0] = Math< T> ::add(rowNorm[0], Math< TVec> ::reduceAdd(val));< / div >
< div class = "line" > < a name = "l00060" > < / a > < span class = "lineno" > 60< / span >   }< / div >
< div class = "line" > < a name = "l00061" > < / a > < span class = "lineno" > 61< / span >   } < span class = "keywordflow" > else< / span > {< / div >
< div class = "line" > < a name = "l00062" > < / a > < span class = "lineno" > 62< / span >   TVec val = input[rowStart + row][threadIdx.x];< / div >
< div class = "line" > < a name = "l00063" > < / a > < span class = "lineno" > 63< / span >   val = Math< TVec> ::mul(val, val);< / div >
< div class = "line" > < a name = "l00064" > < / a > < span class = "lineno" > 64< / span >   rowNorm[0] = < a class = "code" href = "structfaiss_1_1gpu_1_1Math.html#a4b17f0b5d014f300e76dde5b24af8014" > Math< TVec> ::reduceAdd< / a > (val);< / div >
< div class = "line" > < a name = "l00065" > < / a > < span class = "lineno" > 65< / span >   }< / div >
< div class = "line" > < a name = "l00066" > < / a > < span class = "lineno" > 66< / span >   < / div >
< div class = "line" > < a name = "l00067" > < / a > < span class = "lineno" > 67< / span >   rowNorm[0] = warpReduceAllSum(rowNorm[0]);< / div >
< div class = "line" > < a name = "l00068" > < / a > < span class = "lineno" > 68< / span >   < span class = "keywordflow" > if< / span > (laneId == 0) {< / div >
< div class = "line" > < a name = "l00069" > < / a > < span class = "lineno" > 69< / span >   smem[row * numWarps + warpId] = rowNorm[0];< / div >
< div class = "line" > < a name = "l00070" > < / a > < span class = "lineno" > 70< / span >   }< / div >
< div class = "line" > < a name = "l00071" > < / a > < span class = "lineno" > 71< / span >   }< / div >
< div class = "line" > < a name = "l00072" > < / a > < span class = "lineno" > 72< / span >   } < span class = "keywordflow" > else< / span > {< / div >
< div class = "line" > < a name = "l00073" > < / a > < span class = "lineno" > 73< / span >   < span class = "comment" > // We are guaranteed that all RowTileSize rows are available in< / span > < / div >
< div class = "line" > < a name = "l00074" > < / a > < span class = "lineno" > 74< / span >   < span class = "comment" > // [rowStart, rowStart + RowTileSize)< / span > < / div >
< div class = "line" > < a name = "l00075" > < / a > < span class = "lineno" > 75< / span >   < / div >
< div class = "line" > < a name = "l00076" > < / a > < span class = "lineno" > 76< / span >   < span class = "keywordflow" > if< / span > (NormLoop) {< / div >
< div class = "line" > < a name = "l00077" > < / a > < span class = "lineno" > 77< / span >   < span class = "comment" > // A single block of threads is not big enough to span each< / span > < / div >
< div class = "line" > < a name = "l00078" > < / a > < span class = "lineno" > 78< / span >   < span class = "comment" > // vector< / span > < / div >
< div class = "line" > < a name = "l00079" > < / a > < span class = "lineno" > 79< / span >   TVec tmp[RowTileSize];< / div >
< div class = "line" > < a name = "l00080" > < / a > < span class = "lineno" > 80< / span >   < / div >
< div class = "line" > < a name = "l00081" > < / a > < span class = "lineno" > 81< / span >   < span class = "preprocessor" > #pragma unroll< / span > < / div >
< div class = "line" > < a name = "l00082" > < / a > < span class = "lineno" > 82< / span >   < span class = "preprocessor" > < / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > int< / span > row = 0; row < RowTileSize; ++row) {< / div >
< div class = "line" > < a name = "l00083" > < / a > < span class = "lineno" > 83< / span >   rowNorm[row] = Math< T> ::zero();< / div >
< div class = "line" > < a name = "l00084" > < / a > < span class = "lineno" > 84< / span >   }< / div >
< div class = "line" > < a name = "l00085" > < / a > < span class = "lineno" > 85< / span >   < / div >
< div class = "line" > < a name = "l00086" > < / a > < span class = "lineno" > 86< / span >   < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > int< / span > col = threadIdx.x; col < input.getSize(1); col += blockDim.x) {< / div >
< div class = "line" > < a name = "l00087" > < / a > < span class = "lineno" > 87< / span >   < span class = "preprocessor" > #pragma unroll< / span > < / div >
< div class = "line" > < a name = "l00088" > < / a > < span class = "lineno" > 88< / span >   < span class = "preprocessor" > < / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > int< / span > row = 0; row < RowTileSize; ++row) {< / div >
< div class = "line" > < a name = "l00089" > < / a > < span class = "lineno" > 89< / span >   tmp[row] = input[rowStart + row][col];< / div >
< div class = "line" > < a name = "l00090" > < / a > < span class = "lineno" > 90< / span >   }< / div >
< div class = "line" > < a name = "l00091" > < / a > < span class = "lineno" > 91< / span >   < / div >
< div class = "line" > < a name = "l00092" > < / a > < span class = "lineno" > 92< / span >   < span class = "preprocessor" > #pragma unroll< / span > < / div >
< div class = "line" > < a name = "l00093" > < / a > < span class = "lineno" > 93< / span >   < span class = "preprocessor" > < / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > int< / span > row = 0; row < RowTileSize; ++row) {< / div >
< div class = "line" > < a name = "l00094" > < / a > < span class = "lineno" > 94< / span >   tmp[row] = Math< TVec> ::mul(tmp[row], tmp[row]);< / div >
< div class = "line" > < a name = "l00095" > < / a > < span class = "lineno" > 95< / span >   }< / div >
< div class = "line" > < a name = "l00096" > < / a > < span class = "lineno" > 96< / span >   < / div >
< div class = "line" > < a name = "l00097" > < / a > < span class = "lineno" > 97< / span >   < span class = "preprocessor" > #pragma unroll< / span > < / div >
< div class = "line" > < a name = "l00098" > < / a > < span class = "lineno" > 98< / span >   < span class = "preprocessor" > < / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > int< / span > row = 0; row < RowTileSize; ++row) {< / div >
< div class = "line" > < a name = "l00099" > < / a > < span class = "lineno" > 99< / span >   rowNorm[row] = Math< T> ::add(rowNorm[row],< / div >
< div class = "line" > < a name = "l00100" > < / a > < span class = "lineno" > 100< / span >   Math< TVec> ::reduceAdd(tmp[row]));< / div >
< div class = "line" > < a name = "l00101" > < / a > < span class = "lineno" > 101< / span >   }< / div >
< div class = "line" > < a name = "l00102" > < / a > < span class = "lineno" > 102< / span >   }< / div >
< div class = "line" > < a name = "l00103" > < / a > < span class = "lineno" > 103< / span >   } < span class = "keywordflow" > else< / span > {< / div >
< div class = "line" > < a name = "l00104" > < / a > < span class = "lineno" > 104< / span >   TVec tmp[RowTileSize];< / div >
< div class = "line" > < a name = "l00105" > < / a > < span class = "lineno" > 105< / span >   < / div >
< div class = "line" > < a name = "l00106" > < / a > < span class = "lineno" > 106< / span >   < span class = "comment" > // A block of threads is the exact size of the vector< / span > < / div >
< div class = "line" > < a name = "l00107" > < / a > < span class = "lineno" > 107< / span >   < span class = "preprocessor" > #pragma unroll< / span > < / div >
< div class = "line" > < a name = "l00108" > < / a > < span class = "lineno" > 108< / span >   < span class = "preprocessor" > < / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > int< / span > row = 0; row < RowTileSize; ++row) {< / div >
< div class = "line" > < a name = "l00109" > < / a > < span class = "lineno" > 109< / span >   tmp[row] = input[rowStart + row][threadIdx.x];< / div >
< div class = "line" > < a name = "l00110" > < / a > < span class = "lineno" > 110< / span >   }< / div >
< div class = "line" > < a name = "l00111" > < / a > < span class = "lineno" > 111< / span >   < / div >
< div class = "line" > < a name = "l00112" > < / a > < span class = "lineno" > 112< / span >   < span class = "preprocessor" > #pragma unroll< / span > < / div >
< div class = "line" > < a name = "l00113" > < / a > < span class = "lineno" > 113< / span >   < span class = "preprocessor" > < / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > int< / span > row = 0; row < RowTileSize; ++row) {< / div >
< div class = "line" > < a name = "l00114" > < / a > < span class = "lineno" > 114< / span >   tmp[row] = Math< TVec> ::mul(tmp[row], tmp[row]);< / div >
< div class = "line" > < a name = "l00115" > < / a > < span class = "lineno" > 115< / span >   }< / div >
< div class = "line" > < a name = "l00116" > < / a > < span class = "lineno" > 116< / span >   < / div >
< div class = "line" > < a name = "l00117" > < / a > < span class = "lineno" > 117< / span >   < span class = "preprocessor" > #pragma unroll< / span > < / div >
< div class = "line" > < a name = "l00118" > < / a > < span class = "lineno" > 118< / span >   < span class = "preprocessor" > < / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > int< / span > row = 0; row < RowTileSize; ++row) {< / div >
< div class = "line" > < a name = "l00119" > < / a > < span class = "lineno" > 119< / span >   rowNorm[row] = < a class = "code" href = "structfaiss_1_1gpu_1_1Math.html#a4b17f0b5d014f300e76dde5b24af8014" > Math< TVec> ::reduceAdd< / a > (tmp[row]);< / div >
< div class = "line" > < a name = "l00120" > < / a > < span class = "lineno" > 120< / span >   }< / div >
< div class = "line" > < a name = "l00121" > < / a > < span class = "lineno" > 121< / span >   }< / div >
< div class = "line" > < a name = "l00122" > < / a > < span class = "lineno" > 122< / span >   < / div >
< div class = "line" > < a name = "l00123" > < / a > < span class = "lineno" > 123< / span >   < span class = "comment" > // Sum up all parts in each warp< / span > < / div >
< div class = "line" > < a name = "l00124" > < / a > < span class = "lineno" > 124< / span >   < span class = "preprocessor" > #pragma unroll< / span > < / div >
< div class = "line" > < a name = "l00125" > < / a > < span class = "lineno" > 125< / span >   < span class = "preprocessor" > < / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > int< / span > row = 0; row < RowTileSize; ++row) {< / div >
< div class = "line" > < a name = "l00126" > < / a > < span class = "lineno" > 126< / span >   rowNorm[row] = warpReduceAllSum(rowNorm[row]);< / div >
< div class = "line" > < a name = "l00127" > < / a > < span class = "lineno" > 127< / span >   }< / div >
< div class = "line" > < a name = "l00128" > < / a > < span class = "lineno" > 128< / span >   < / div >
< div class = "line" > < a name = "l00129" > < / a > < span class = "lineno" > 129< / span >   < span class = "keywordflow" > if< / span > (laneId == 0) {< / div >
< div class = "line" > < a name = "l00130" > < / a > < span class = "lineno" > 130< / span >   < span class = "preprocessor" > #pragma unroll< / span > < / div >
< div class = "line" > < a name = "l00131" > < / a > < span class = "lineno" > 131< / span >   < span class = "preprocessor" > < / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > int< / span > row = 0; row < RowTileSize; ++row) {< / div >
< div class = "line" > < a name = "l00132" > < / a > < span class = "lineno" > 132< / span >   smem[row * numWarps + warpId] = rowNorm[row];< / div >
< div class = "line" > < a name = "l00133" > < / a > < span class = "lineno" > 133< / span >   }< / div >
< div class = "line" > < a name = "l00134" > < / a > < span class = "lineno" > 134< / span >   }< / div >
< div class = "line" > < a name = "l00135" > < / a > < span class = "lineno" > 135< / span >   }< / div >
< div class = "line" > < a name = "l00136" > < / a > < span class = "lineno" > 136< / span >   < / div >
< div class = "line" > < a name = "l00137" > < / a > < span class = "lineno" > 137< / span >   __syncthreads();< / div >
< div class = "line" > < a name = "l00138" > < / a > < span class = "lineno" > 138< / span >   < / div >
< div class = "line" > < a name = "l00139" > < / a > < span class = "lineno" > 139< / span >   < span class = "comment" > // Sum across warps< / span > < / div >
< div class = "line" > < a name = "l00140" > < / a > < span class = "lineno" > 140< / span >   < span class = "keywordflow" > if< / span > (warpId == 0) {< / div >
< div class = "line" > < a name = "l00141" > < / a > < span class = "lineno" > 141< / span >   < span class = "preprocessor" > #pragma unroll< / span > < / div >
< div class = "line" > < a name = "l00142" > < / a > < span class = "lineno" > 142< / span >   < span class = "preprocessor" > < / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > int< / span > row = 0; row < RowTileSize; ++row) {< / div >
< div class = "line" > < a name = "l00143" > < / a > < span class = "lineno" > 143< / span >   rowNorm[row] = laneId < numWarps ?< / div >
< div class = "line" > < a name = "l00144" > < / a > < span class = "lineno" > 144< / span >   smem[row * numWarps + laneId] : Math< T> ::zero();< / div >
< div class = "line" > < a name = "l00145" > < / a > < span class = "lineno" > 145< / span >   }< / div >
< div class = "line" > < a name = "l00146" > < / a > < span class = "lineno" > 146< / span >   < / div >
< div class = "line" > < a name = "l00147" > < / a > < span class = "lineno" > 147< / span >   < span class = "preprocessor" > #pragma unroll< / span > < / div >
< div class = "line" > < a name = "l00148" > < / a > < span class = "lineno" > 148< / span >   < span class = "preprocessor" > < / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > int< / span > row = 0; row < RowTileSize; ++row) {< / div >
< div class = "line" > < a name = "l00149" > < / a > < span class = "lineno" > 149< / span >   rowNorm[row] = warpReduceAllSum(rowNorm[row]);< / div >
< div class = "line" > < a name = "l00150" > < / a > < span class = "lineno" > 150< / span >   }< / div >
< div class = "line" > < a name = "l00151" > < / a > < span class = "lineno" > 151< / span >   < / div >
< div class = "line" > < a name = "l00152" > < / a > < span class = "lineno" > 152< / span >   < span class = "comment" > // Write out answer< / span > < / div >
< div class = "line" > < a name = "l00153" > < / a > < span class = "lineno" > 153< / span >   < span class = "keywordflow" > if< / span > (laneId == 0) {< / div >
< div class = "line" > < a name = "l00154" > < / a > < span class = "lineno" > 154< / span >   < span class = "preprocessor" > #pragma unroll< / span > < / div >
< div class = "line" > < a name = "l00155" > < / a > < span class = "lineno" > 155< / span >   < span class = "preprocessor" > < / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > int< / span > row = 0; row < RowTileSize; ++row) {< / div >
< div class = "line" > < a name = "l00156" > < / a > < span class = "lineno" > 156< / span >   < span class = "keywordtype" > int< / span > outCol = rowStart + row;< / div >
< div class = "line" > < a name = "l00157" > < / a > < span class = "lineno" > 157< / span >   < / div >
< div class = "line" > < a name = "l00158" > < / a > < span class = "lineno" > 158< / span >   < span class = "keywordflow" > if< / span > (lastRowTile) {< / div >
< div class = "line" > < a name = "l00159" > < / a > < span class = "lineno" > 159< / span >   < span class = "keywordflow" > if< / span > (outCol < output.getSize(0)) {< / div >
< div class = "line" > < a name = "l00160" > < / a > < span class = "lineno" > 160< / span >   output[outCol] =< / div >
< div class = "line" > < a name = "l00161" > < / a > < span class = "lineno" > 161< / span >   NormSquared ? rowNorm[row] :< / div >
< div class = "line" > < a name = "l00162" > < / a > < span class = "lineno" > 162< / span >   ConvertTo< T> ::to(< / div >
< div class = "line" > < a name = "l00163" > < / a > < span class = "lineno" > 163< / span >   sqrtf(ConvertTo< float> ::to(rowNorm[row])));< / div >
< div class = "line" > < a name = "l00164" > < / a > < span class = "lineno" > 164< / span >   }< / div >
< div class = "line" > < a name = "l00165" > < / a > < span class = "lineno" > 165< / span >   } < span class = "keywordflow" > else< / span > {< / div >
< div class = "line" > < a name = "l00166" > < / a > < span class = "lineno" > 166< / span >   output[outCol] =< / div >
< div class = "line" > < a name = "l00167" > < / a > < span class = "lineno" > 167< / span >   NormSquared ? rowNorm[row] :< / div >
< div class = "line" > < a name = "l00168" > < / a > < span class = "lineno" > 168< / span >   ConvertTo< T> ::to(< / div >
< div class = "line" > < a name = "l00169" > < / a > < span class = "lineno" > 169< / span >   sqrtf(ConvertTo< float> ::to(rowNorm[row])));< / div >
< div class = "line" > < a name = "l00170" > < / a > < span class = "lineno" > 170< / span >   }< / div >
< div class = "line" > < a name = "l00171" > < / a > < span class = "lineno" > 171< / span >   }< / div >
< div class = "line" > < a name = "l00172" > < / a > < span class = "lineno" > 172< / span >   }< / div >
< div class = "line" > < a name = "l00173" > < / a > < span class = "lineno" > 173< / span >   }< / div >
< div class = "line" > < a name = "l00174" > < / a > < span class = "lineno" > 174< / span >   }< / div >
< div class = "line" > < a name = "l00175" > < / a > < span class = "lineno" > 175< / span >   < / div >
< div class = "line" > < a name = "l00176" > < / a > < span class = "lineno" > 176< / span >   < span class = "keyword" > template< / span > < < span class = "keyword" > typename< / span > T, < span class = "keyword" > typename< / span > TVec> < / div >
< div class = "line" > < a name = "l00177" > < / a > < span class = "lineno" > 177< / span >   < span class = "keywordtype" > void< / span > runL2Norm(Tensor< T, 2, true> & input,< / div >
< div class = "line" > < a name = "l00178" > < / a > < span class = "lineno" > 178< / span >   Tensor< T, 1, true> & output,< / div >
< div class = "line" > < a name = "l00179" > < / a > < span class = "lineno" > 179< / span >   < span class = "keywordtype" > bool< / span > normSquared,< / div >
< div class = "line" > < a name = "l00180" > < / a > < span class = "lineno" > 180< / span >   cudaStream_t stream) {< / div >
< div class = "line" > < a name = "l00181" > < / a > < span class = "lineno" > 181< / span >   FAISS_ASSERT(input.getSize(0) == output.getSize(0));< / div >
< div class = "line" > < a name = "l00182" > < / a > < span class = "lineno" > 182< / span >   < / div >
< div class = "line" > < a name = "l00183" > < / a > < span class = "lineno" > 183< / span >   < span class = "keywordtype" > int< / span > maxThreads = getMaxThreadsCurrentDevice();< / div >
< div class = "line" > < a name = "l00184" > < / a > < span class = "lineno" > 184< / span >   constexpr < span class = "keywordtype" > int< / span > rowTileSize = 8;< / div >
< div class = "line" > < a name = "l00185" > < / a > < span class = "lineno" > 185< / span >   < / div >
< div class = "line" > < a name = "l00186" > < / a > < span class = "lineno" > 186< / span >   < span class = "preprocessor" > #define RUN_L2(TYPE_T, TYPE_TVEC, INPUT) \< / span > < / div >
< div class = "line" > < a name = "l00187" > < / a > < span class = "lineno" > 187< / span >   < span class = "preprocessor" > do { \< / span > < / div >
< div class = "line" > < a name = "l00188" > < / a > < span class = "lineno" > 188< / span >   < span class = "preprocessor" > if (normLoop) { \< / span > < / div >
< div class = "line" > < a name = "l00189" > < / a > < span class = "lineno" > 189< / span >   < span class = "preprocessor" > if (normSquared) { \< / span > < / div >
< div class = "line" > < a name = "l00190" > < / a > < span class = "lineno" > 190< / span >   < span class = "preprocessor" > l2Norm< TYPE_T, TYPE_TVEC, rowTileSize, true, true> \< / span > < / div >
< div class = "line" > < a name = "l00191" > < / a > < span class = "lineno" > 191< / span >   < span class = "preprocessor" > < < < grid, block, smem, stream> > > (INPUT, output); \< / span > < / div >
< div class = "line" > < a name = "l00192" > < / a > < span class = "lineno" > 192< / span >   < span class = "preprocessor" > } else { \< / span > < / div >
< div class = "line" > < a name = "l00193" > < / a > < span class = "lineno" > 193< / span >   < span class = "preprocessor" > l2Norm< TYPE_T, TYPE_TVEC, rowTileSize, true, false> \< / span > < / div >
< div class = "line" > < a name = "l00194" > < / a > < span class = "lineno" > 194< / span >   < span class = "preprocessor" > < < < grid, block, smem, stream> > > (INPUT, output); \< / span > < / div >
< div class = "line" > < a name = "l00195" > < / a > < span class = "lineno" > 195< / span >   < span class = "preprocessor" > } \< / span > < / div >
< div class = "line" > < a name = "l00196" > < / a > < span class = "lineno" > 196< / span >   < span class = "preprocessor" > } else { \< / span > < / div >
< div class = "line" > < a name = "l00197" > < / a > < span class = "lineno" > 197< / span >   < span class = "preprocessor" > if (normSquared) { \< / span > < / div >
< div class = "line" > < a name = "l00198" > < / a > < span class = "lineno" > 198< / span >   < span class = "preprocessor" > l2Norm< TYPE_T, TYPE_TVEC, rowTileSize, false, true> \< / span > < / div >
< div class = "line" > < a name = "l00199" > < / a > < span class = "lineno" > 199< / span >   < span class = "preprocessor" > < < < grid, block, smem, stream> > > (INPUT, output); \< / span > < / div >
< div class = "line" > < a name = "l00200" > < / a > < span class = "lineno" > 200< / span >   < span class = "preprocessor" > } else { \< / span > < / div >
< div class = "line" > < a name = "l00201" > < / a > < span class = "lineno" > 201< / span >   < span class = "preprocessor" > l2Norm< TYPE_T, TYPE_TVEC, rowTileSize, false, false> \< / span > < / div >
< div class = "line" > < a name = "l00202" > < / a > < span class = "lineno" > 202< / span >   < span class = "preprocessor" > < < < grid, block, smem, stream> > > (INPUT, output); \< / span > < / div >
< div class = "line" > < a name = "l00203" > < / a > < span class = "lineno" > 203< / span >   < span class = "preprocessor" > } \< / span > < / div >
< div class = "line" > < a name = "l00204" > < / a > < span class = "lineno" > 204< / span >   < span class = "preprocessor" > } \< / span > < / div >
< div class = "line" > < a name = "l00205" > < / a > < span class = "lineno" > 205< / span >   < span class = "preprocessor" > } while (0)< / span > < / div >
< div class = "line" > < a name = "l00206" > < / a > < span class = "lineno" > 206< / span >   < span class = "preprocessor" > < / span > < / div >
< div class = "line" > < a name = "l00207" > < / a > < span class = "lineno" > 207< / span >   < span class = "keywordflow" > if< / span > (input.template canCastResize< TVec> ()) {< / div >
< div class = "line" > < a name = "l00208" > < / a > < span class = "lineno" > 208< / span >   < span class = "comment" > // Can load using the vectorized type< / span > < / div >
< div class = "line" > < a name = "l00209" > < / a > < span class = "lineno" > 209< / span >   < span class = "keyword" > auto< / span > inputV = input.template castResize< TVec> ();< / div >
< div class = "line" > < a name = "l00210" > < / a > < span class = "lineno" > 210< / span >   < / div >
< div class = "line" > < a name = "l00211" > < / a > < span class = "lineno" > 211< / span >   < span class = "keywordtype" > int< / span > dim = inputV.getSize(1);< / div >
< div class = "line" > < a name = "l00212" > < / a > < span class = "lineno" > 212< / span >   < span class = "keywordtype" > bool< / span > normLoop = dim > maxThreads;< / div >
< div class = "line" > < a name = "l00213" > < / a > < span class = "lineno" > 213< / span >   < span class = "keywordtype" > int< / span > numThreads = min(dim, maxThreads);< / div >
< div class = "line" > < a name = "l00214" > < / a > < span class = "lineno" > 214< / span >   < / div >
< div class = "line" > < a name = "l00215" > < / a > < span class = "lineno" > 215< / span >   < span class = "keyword" > auto< / span > grid = dim3(utils::divUp(inputV.getSize(0), rowTileSize));< / div >
< div class = "line" > < a name = "l00216" > < / a > < span class = "lineno" > 216< / span >   < span class = "keyword" > auto< / span > block = dim3(numThreads);< / div >
< div class = "line" > < a name = "l00217" > < / a > < span class = "lineno" > 217< / span >   < / div >
< div class = "line" > < a name = "l00218" > < / a > < span class = "lineno" > 218< / span >   < span class = "keyword" > auto< / span > smem = < span class = "keyword" > sizeof< / span > (T) * rowTileSize * utils::divUp(numThreads, kWarpSize);< / div >
< div class = "line" > < a name = "l00219" > < / a > < span class = "lineno" > 219< / span >   < / div >
< div class = "line" > < a name = "l00220" > < / a > < span class = "lineno" > 220< / span >   RUN_L2(T, TVec, inputV);< / div >
< div class = "line" > < a name = "l00221" > < / a > < span class = "lineno" > 221< / span >   } < span class = "keywordflow" > else< / span > {< / div >
< div class = "line" > < a name = "l00222" > < / a > < span class = "lineno" > 222< / span >   < span class = "comment" > // Can' t load using the vectorized type< / span > < / div >
< div class = "line" > < a name = "l00223" > < / a > < span class = "lineno" > 223< / span >   < / div >
< div class = "line" > < a name = "l00224" > < / a > < span class = "lineno" > 224< / span >   < span class = "keywordtype" > int< / span > dim = input.getSize(1);< / div >
< div class = "line" > < a name = "l00225" > < / a > < span class = "lineno" > 225< / span >   < span class = "keywordtype" > bool< / span > normLoop = dim > maxThreads;< / div >
< div class = "line" > < a name = "l00226" > < / a > < span class = "lineno" > 226< / span >   < span class = "keywordtype" > int< / span > numThreads = min(dim, maxThreads);< / div >
< div class = "line" > < a name = "l00227" > < / a > < span class = "lineno" > 227< / span >   < / div >
< div class = "line" > < a name = "l00228" > < / a > < span class = "lineno" > 228< / span >   < span class = "keyword" > auto< / span > grid = dim3(utils::divUp(input.getSize(0), rowTileSize));< / div >
< div class = "line" > < a name = "l00229" > < / a > < span class = "lineno" > 229< / span >   < span class = "keyword" > auto< / span > block = dim3(numThreads);< / div >
< div class = "line" > < a name = "l00230" > < / a > < span class = "lineno" > 230< / span >   < / div >
< div class = "line" > < a name = "l00231" > < / a > < span class = "lineno" > 231< / span >   < span class = "keyword" > auto< / span > smem = < span class = "keyword" > sizeof< / span > (T) * rowTileSize * utils::divUp(numThreads, kWarpSize);< / div >
< div class = "line" > < a name = "l00232" > < / a > < span class = "lineno" > 232< / span >   < / div >
< div class = "line" > < a name = "l00233" > < / a > < span class = "lineno" > 233< / span >   RUN_L2(T, T, input);< / div >
< div class = "line" > < a name = "l00234" > < / a > < span class = "lineno" > 234< / span >   }< / div >
< div class = "line" > < a name = "l00235" > < / a > < span class = "lineno" > 235< / span >   < / div >
< div class = "line" > < a name = "l00236" > < / a > < span class = "lineno" > 236< / span >   < span class = "preprocessor" > #undef RUN_L2< / span > < / div >
< div class = "line" > < a name = "l00237" > < / a > < span class = "lineno" > 237< / span >   < span class = "preprocessor" > < / span > < / div >
< div class = "line" > < a name = "l00238" > < / a > < span class = "lineno" > 238< / span >   CUDA_VERIFY(cudaGetLastError());< / div >
< div class = "line" > < a name = "l00239" > < / a > < span class = "lineno" > 239< / span >   }< / div >
< div class = "line" > < a name = "l00240" > < / a > < span class = "lineno" > 240< / span >   < / div >
< div class = "line" > < a name = "l00241" > < / a > < span class = "lineno" > 241< / span >   < span class = "keywordtype" > void< / span > runL2Norm(Tensor< float, 2, true> & input,< / div >
< div class = "line" > < a name = "l00242" > < / a > < span class = "lineno" > 242< / span >   Tensor< float, 1, true> & output,< / div >
< div class = "line" > < a name = "l00243" > < / a > < span class = "lineno" > 243< / span >   < span class = "keywordtype" > bool< / span > normSquared,< / div >
< div class = "line" > < a name = "l00244" > < / a > < span class = "lineno" > 244< / span >   cudaStream_t stream) {< / div >
< div class = "line" > < a name = "l00245" > < / a > < span class = "lineno" > 245< / span >   runL2Norm< float, float4> (input, output, normSquared, stream);< / div >
< div class = "line" > < a name = "l00246" > < / a > < span class = "lineno" > 246< / span >   }< / div >
< div class = "line" > < a name = "l00247" > < / a > < span class = "lineno" > 247< / span >   < / div >
< div class = "line" > < a name = "l00248" > < / a > < span class = "lineno" > 248< / span >   < span class = "preprocessor" > #ifdef FAISS_USE_FLOAT16< / span > < / div >
< div class = "line" > < a name = "l00249" > < / a > < span class = "lineno" > 249< / span >   < span class = "preprocessor" > < / span > < span class = "keywordtype" > void< / span > runL2Norm(Tensor< half, 2, true> & input,< / div >
< div class = "line" > < a name = "l00250" > < / a > < span class = "lineno" > 250< / span >   Tensor< half, 1, true> & output,< / div >
< div class = "line" > < a name = "l00251" > < / a > < span class = "lineno" > 251< / span >   < span class = "keywordtype" > bool< / span > normSquared,< / div >
< div class = "line" > < a name = "l00252" > < / a > < span class = "lineno" > 252< / span >   cudaStream_t stream) {< / div >
< div class = "line" > < a name = "l00253" > < / a > < span class = "lineno" > 253< / span >   runL2Norm< half, half2> (input, output, normSquared, stream);< / div >
< div class = "line" > < a name = "l00254" > < / a > < span class = "lineno" > 254< / span >   }< / div >
< div class = "line" > < a name = "l00255" > < / a > < span class = "lineno" > 255< / span >   < span class = "preprocessor" > #endif< / span > < / div >
< div class = "line" > < a name = "l00256" > < / a > < span class = "lineno" > 256< / span >   < span class = "preprocessor" > < / span > < / div >
< div class = "line" > < a name = "l00257" > < / a > < span class = "lineno" > 257< / span >   } } < span class = "comment" > // namespace< / span > < / div >
< div class = "ttc" id = "structfaiss_1_1gpu_1_1Math_html_a4b17f0b5d014f300e76dde5b24af8014" > < div class = "ttname" > < a href = "structfaiss_1_1gpu_1_1Math.html#a4b17f0b5d014f300e76dde5b24af8014" > faiss::gpu::Math::reduceAdd< / a > < / div > < div class = "ttdeci" > static __device__ T reduceAdd(T v)< / div > < div class = "ttdoc" > For a vector type, this is a horizontal add, returning sum(v_i) < / div > < div class = "ttdef" > < b > Definition:< / b > < a href = "MathOperators_8cuh_source.html#l00045" > MathOperators.cuh:45< / a > < / div > < / div >
< / div > <!-- fragment --> < / div > <!-- contents -->
<!-- start footer part -->
< hr class = "footer" / > < address class = "footer" > < small >
Generated by   < a href = "http://www.doxygen.org/index.html" >
< img class = "footer" src = "doxygen.png" alt = "doxygen" / >
< / a > 1.8.5
< / small > < / address >
< / body >
< / html >