From 92f2391f41300b5c53bf189a7bd9ab6bcfe56a8c Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Tue, 13 Oct 2020 11:45:35 -0700 Subject: [PATCH] Remove unused nvidia host fp16 headers/functions Summary: Removes unused host fp16 code, the dependency upon which was removed a while ago. Reviewed By: beauby Differential Revision: D24279982 fbshipit-source-id: 5f6820c41eb387f766b2bed7e70203f5e01f49e9 --- faiss/gpu/CMakeLists.txt | 2 - faiss/gpu/utils/Float16.cu | 35 ------ faiss/gpu/utils/Float16.cuh | 9 +- faiss/gpu/utils/nvidia/fp16_emu.cu | 158 ---------------------------- faiss/gpu/utils/nvidia/fp16_emu.cuh | 111 ------------------- 5 files changed, 6 insertions(+), 309 deletions(-) delete mode 100644 faiss/gpu/utils/Float16.cu delete mode 100644 faiss/gpu/utils/nvidia/fp16_emu.cu delete mode 100644 faiss/gpu/utils/nvidia/fp16_emu.cuh diff --git a/faiss/gpu/CMakeLists.txt b/faiss/gpu/CMakeLists.txt index 5b2a957f8..3399b9e33 100644 --- a/faiss/gpu/CMakeLists.txt +++ b/faiss/gpu/CMakeLists.txt @@ -39,7 +39,6 @@ target_sources(faiss PRIVATE utils/BlockSelectFloat.cu utils/BlockSelectHalf.cu utils/DeviceUtils.cu - utils/Float16.cu utils/StackDeviceMemory.cpp utils/Timer.cpp utils/WarpSelectFloat.cu @@ -66,7 +65,6 @@ target_sources(faiss PRIVATE utils/blockselect/BlockSelectHalfT1024.cu utils/blockselect/BlockSelectHalfT2048.cu utils/blockselect/BlockSelectHalfT512.cu - utils/nvidia/fp16_emu.cu utils/warpselect/WarpSelectFloat1.cu utils/warpselect/WarpSelectFloat128.cu utils/warpselect/WarpSelectFloat256.cu diff --git a/faiss/gpu/utils/Float16.cu b/faiss/gpu/utils/Float16.cu deleted file mode 100644 index bcfa5a7ed..000000000 --- a/faiss/gpu/utils/Float16.cu +++ /dev/null @@ -1,35 +0,0 @@ -/** - * Copyright (c) Facebook, Inc. and its affiliates. - * - * This source code is licensed under the MIT license found in the - * LICENSE file in the root directory of this source tree. - */ - - -#include -#include -#include -#include - -namespace faiss { namespace gpu { - -bool getDeviceSupportsFloat16Math(int device) { - const auto& prop = getDeviceProperties(device); - - return (prop.major >= 6 || - (prop.major == 5 && prop.minor >= 3)); -} - -__half hostFloat2Half(float a) { -#if CUDA_VERSION >= 9000 - __half_raw raw; - raw.x = cpu_float2half_rn(a).x; - return __half(raw); -#else - __half h; - h.x = cpu_float2half_rn(a).x; - return h; -#endif -} - -} } // namespace diff --git a/faiss/gpu/utils/Float16.cuh b/faiss/gpu/utils/Float16.cuh index 83408fbb2..32f87614c 100644 --- a/faiss/gpu/utils/Float16.cuh +++ b/faiss/gpu/utils/Float16.cuh @@ -10,7 +10,7 @@ #include #include -#include +#include // Some compute capabilities have full float16 ALUs. #if __CUDA_ARCH__ >= 530 @@ -63,8 +63,11 @@ struct Half8 { }; /// Returns true if the given device supports native float16 math -bool getDeviceSupportsFloat16Math(int device); +inline bool getDeviceSupportsFloat16Math(int device) { + const auto& prop = getDeviceProperties(device); -__half hostFloat2Half(float v); + return (prop.major >= 6 || + (prop.major == 5 && prop.minor >= 3)); +} } } // namespace diff --git a/faiss/gpu/utils/nvidia/fp16_emu.cu b/faiss/gpu/utils/nvidia/fp16_emu.cu deleted file mode 100644 index 9f5f5ee11..000000000 --- a/faiss/gpu/utils/nvidia/fp16_emu.cu +++ /dev/null @@ -1,158 +0,0 @@ -// from Nvidia cuDNN library samples; modified to compile within faiss - -#include - -namespace faiss { namespace gpu { - -/* - * Copyright 1993-2014 NVIDIA Corporation. All rights reserved. - * - * NOTICE TO LICENSEE: - * - * This source code and/or documentation ("Licensed Deliverables") are - * subject to NVIDIA intellectual property rights under U.S. and - * international Copyright laws. - * - * These Licensed Deliverables contained herein is PROPRIETARY and - * CONFIDENTIAL to NVIDIA and is being provided under the terms and - * conditions of a form of NVIDIA software license agreement by and - * between NVIDIA and Licensee ("License Agreement") or electronically - * accepted by Licensee. Notwithstanding any terms or conditions to - * the contrary in the License Agreement, reproduction or disclosure - * of the Licensed Deliverables to any third party without the express - * written consent of NVIDIA is prohibited. - * - * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE - * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE - * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS - * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND. - * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED - * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY, - * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. - * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE - * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY - * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY - * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, - * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS - * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE - * OF THESE LICENSED DELIVERABLES. - * - * U.S. Government End Users. These Licensed Deliverables are a - * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT - * 1995), consisting of "commercial computer software" and "commercial - * computer software documentation" as such terms are used in 48 - * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government - * only as a commercial end item. Consistent with 48 C.F.R.12.212 and - * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all - * U.S. Government End Users acquire the Licensed Deliverables with - * only those rights set forth herein. - * - * Any use of the Licensed Deliverables in individual and commercial - * software must include, in the user documentation and internal - * comments to the code, the above Disclaimer and U.S. Government End - * Users Notice. - */ - -// Host functions for converting between FP32 and FP16 formats -// Paulius Micikevicius (pauliusm@nvidia.com) - -half1 cpu_float2half_rn(float f) -{ - half1 ret; - - union { - float f; - unsigned u; - } un; - - un.f = f; - - unsigned x = un.u; - unsigned u = (x & 0x7fffffff), remainder, shift, lsb, lsb_s1, lsb_m1; - unsigned sign, exponent, mantissa; - - // Get rid of +NaN/-NaN case first. - if (u > 0x7f800000) { - ret.x = 0x7fffU; - return ret; - } - - sign = ((x >> 16) & 0x8000); - - // Get rid of +Inf/-Inf, +0/-0. - if (u > 0x477fefff) { - ret.x = sign | 0x7c00U; - return ret; - } - if (u < 0x33000001) { - ret.x = (sign | 0x0000); - return ret; - } - - exponent = ((u >> 23) & 0xff); - mantissa = (u & 0x7fffff); - - if (exponent > 0x70) { - shift = 13; - exponent -= 0x70; - } else { - shift = 0x7e - exponent; - exponent = 0; - mantissa |= 0x800000; - } - lsb = (1 << shift); - lsb_s1 = (lsb >> 1); - lsb_m1 = (lsb - 1); - - // Round to nearest even. - remainder = (mantissa & lsb_m1); - mantissa >>= shift; - if (remainder > lsb_s1 || (remainder == lsb_s1 && (mantissa & 0x1))) { - ++mantissa; - if (!(mantissa & 0x3ff)) { - ++exponent; - mantissa = 0; - } - } - - ret.x = (sign | (exponent << 10) | mantissa); - - return ret; -} - - -float cpu_half2float(half1 h) -{ - unsigned sign = ((h.x >> 15) & 1); - unsigned exponent = ((h.x >> 10) & 0x1f); - unsigned mantissa = ((h.x & 0x3ff) << 13); - - if (exponent == 0x1f) { /* NaN or Inf */ - mantissa = (mantissa ? (sign = 0, 0x7fffff) : 0); - exponent = 0xff; - } else if (!exponent) { /* Denorm or Zero */ - if (mantissa) { - unsigned int msb; - exponent = 0x71; - do { - msb = (mantissa & 0x400000); - mantissa <<= 1; /* normalize */ - --exponent; - } while (!msb); - mantissa &= 0x7fffff; /* 1.mantissa is implicit */ - } - } else { - exponent += 0x70; - } - - union { - int i; - float f; - } un; - - un.i = ((sign << 31) | (exponent << 23) | mantissa); - - return un.f; -} - -} } // namespace diff --git a/faiss/gpu/utils/nvidia/fp16_emu.cuh b/faiss/gpu/utils/nvidia/fp16_emu.cuh deleted file mode 100644 index cea1e56ee..000000000 --- a/faiss/gpu/utils/nvidia/fp16_emu.cuh +++ /dev/null @@ -1,111 +0,0 @@ -// from Nvidia cuDNN library samples; modified to compile within faiss - -#pragma once - -namespace faiss { namespace gpu { - -/* - * Copyright 1993-2014 NVIDIA Corporation. All rights reserved. - * - * NOTICE TO LICENSEE: - * - * This source code and/or documentation ("Licensed Deliverables") are - * subject to NVIDIA intellectual property rights under U.S. and - * international Copyright laws. - * - * These Licensed Deliverables contained herein is PROPRIETARY and - * CONFIDENTIAL to NVIDIA and is being provided under the terms and - * conditions of a form of NVIDIA software license agreement by and - * between NVIDIA and Licensee ("License Agreement") or electronically - * accepted by Licensee. Notwithstanding any terms or conditions to - * the contrary in the License Agreement, reproduction or disclosure - * of the Licensed Deliverables to any third party without the express - * written consent of NVIDIA is prohibited. - * - * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE - * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE - * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS - * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND. - * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED - * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY, - * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. - * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE - * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY - * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY - * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, - * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS - * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE - * OF THESE LICENSED DELIVERABLES. - * - * U.S. Government End Users. These Licensed Deliverables are a - * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT - * 1995), consisting of "commercial computer software" and "commercial - * computer software documentation" as such terms are used in 48 - * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government - * only as a commercial end item. Consistent with 48 C.F.R.12.212 and - * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all - * U.S. Government End Users acquire the Licensed Deliverables with - * only those rights set forth herein. - * - * Any use of the Licensed Deliverables in individual and commercial - * software must include, in the user documentation and internal - * comments to the code, the above Disclaimer and U.S. Government End - * Users Notice. - */ - -// Conversion from/to 16-bit floating point (half-precision). - -#define HLF_EPSILON 4.887581E-04 - -typedef struct __align__(2) { - unsigned short x; -} half1; - -half1 cpu_float2half_rn(float f); - -float cpu_half2float(half1 h); - -static __inline__ __device__ __host__ half1 habs(half1 h) -{ - h.x &= 0x7fffU; - return h; -} - -static __inline__ __device__ __host__ half1 hneg(half1 h) -{ - h.x ^= 0x8000U; - return h; -} - -static __inline__ __device__ __host__ int ishnan(half1 h) -{ - // When input is NaN, exponent is all ones and mantissa is non-zero. - return (h.x & 0x7c00U) == 0x7c00U && (h.x & 0x03ffU) != 0; -} - -static __inline__ __device__ __host__ int ishinf(half1 h) -{ - // When input is +/- inf, exponent is all ones and mantissa is zero. - return (h.x & 0x7c00U) == 0x7c00U && (h.x & 0x03ffU) == 0; -} - -static __inline__ __device__ __host__ int ishequ(half1 x, half1 y) -{ - return ishnan(x) == 0 && ishnan(y) == 0 && x.x == y.x; -} - -static __inline__ __device__ __host__ half1 hzero() -{ - half1 ret; - ret.x = 0x0000U; - return ret; -} - -static __inline__ __device__ __host__ half1 hone() -{ - half1 ret; - ret.x = 0x3c00U; - return ret; -} - -} } // namespace