Workaround for vshl/vshr on aarch64 GCC (#1882)

Summary:
related: https://github.com/facebookresearch/faiss/issues/1815,  https://github.com/facebookresearch/faiss/issues/1880

`vshl` / `vshr` of ARM NEON requires immediate (compiletime constant) value as shift parameter.
However, the implementations of those intrinsics on GCC can receive runtime value.
Current faiss implementation depends on this, so some correct-behavioring compilers like Clang can't build faiss for aarch64.
This PR fix this issue; thus faiss applied this PR can be built with Clang for aarch64 machines like M1 Mac.

Pull Request resolved: https://github.com/facebookresearch/faiss/pull/1882

Reviewed By: beauby

Differential Revision: D28465563

Pulled By: mdouze

fbshipit-source-id: e431dfb3b27c9728072f50b4bf9445a3f4a5ac43
This commit is contained in:
Y.Imaizumi 2021-05-20 14:54:36 -07:00 committed by Facebook GitHub Bot
parent ef33daae92
commit e52f5d81f8

View File

@ -181,6 +181,16 @@ static inline uint32_t cmp_xe32(
return d0_mask | static_cast<uint32_t>(d1_mask) << 16;
}
template <std::uint8_t Shift>
static inline uint16x8_t vshlq(uint16x8_t vec) {
return vshlq_n_u16(vec, Shift);
}
template <std::uint8_t Shift>
static inline uint16x8_t vshrq(uint16x8_t vec) {
return vshrq_n_u16(vec, Shift);
}
} // namespace simdlib
} // namespace detail
@ -252,14 +262,112 @@ struct simd16uint16 {
// shift must be known at compile time
simd16uint16 operator>>(const int shift) const {
switch (shift) {
case 0:
return *this;
case 1:
return simd16uint16{detail::simdlib::unary_func(
data, [shift](uint16x8_t a) { return vshrq_n_u16(a, shift); })};
data, detail::simdlib::vshrq<1>)};
case 2:
return simd16uint16{detail::simdlib::unary_func(
data, detail::simdlib::vshrq<2>)};
case 3:
return simd16uint16{detail::simdlib::unary_func(
data, detail::simdlib::vshrq<3>)};
case 4:
return simd16uint16{detail::simdlib::unary_func(
data, detail::simdlib::vshrq<4>)};
case 5:
return simd16uint16{detail::simdlib::unary_func(
data, detail::simdlib::vshrq<5>)};
case 6:
return simd16uint16{detail::simdlib::unary_func(
data, detail::simdlib::vshrq<6>)};
case 7:
return simd16uint16{detail::simdlib::unary_func(
data, detail::simdlib::vshrq<7>)};
case 8:
return simd16uint16{detail::simdlib::unary_func(
data, detail::simdlib::vshrq<8>)};
case 9:
return simd16uint16{detail::simdlib::unary_func(
data, detail::simdlib::vshrq<9>)};
case 10:
return simd16uint16{detail::simdlib::unary_func(
data, detail::simdlib::vshrq<10>)};
case 11:
return simd16uint16{detail::simdlib::unary_func(
data, detail::simdlib::vshrq<11>)};
case 12:
return simd16uint16{detail::simdlib::unary_func(
data, detail::simdlib::vshrq<12>)};
case 13:
return simd16uint16{detail::simdlib::unary_func(
data, detail::simdlib::vshrq<13>)};
case 14:
return simd16uint16{detail::simdlib::unary_func(
data, detail::simdlib::vshrq<14>)};
case 15:
return simd16uint16{detail::simdlib::unary_func(
data, detail::simdlib::vshrq<15>)};
default:
FAISS_THROW_FMT("Invalid shift %d", shift);
}
}
// shift must be known at compile time
simd16uint16 operator<<(const int shift) const {
switch (shift) {
case 0:
return *this;
case 1:
return simd16uint16{detail::simdlib::unary_func(
data, [shift](uint16x8_t a) { return vshlq_n_u16(a, shift); })};
data, detail::simdlib::vshlq<1>)};
case 2:
return simd16uint16{detail::simdlib::unary_func(
data, detail::simdlib::vshlq<2>)};
case 3:
return simd16uint16{detail::simdlib::unary_func(
data, detail::simdlib::vshlq<3>)};
case 4:
return simd16uint16{detail::simdlib::unary_func(
data, detail::simdlib::vshlq<4>)};
case 5:
return simd16uint16{detail::simdlib::unary_func(
data, detail::simdlib::vshlq<5>)};
case 6:
return simd16uint16{detail::simdlib::unary_func(
data, detail::simdlib::vshlq<6>)};
case 7:
return simd16uint16{detail::simdlib::unary_func(
data, detail::simdlib::vshlq<7>)};
case 8:
return simd16uint16{detail::simdlib::unary_func(
data, detail::simdlib::vshlq<8>)};
case 9:
return simd16uint16{detail::simdlib::unary_func(
data, detail::simdlib::vshlq<9>)};
case 10:
return simd16uint16{detail::simdlib::unary_func(
data, detail::simdlib::vshlq<10>)};
case 11:
return simd16uint16{detail::simdlib::unary_func(
data, detail::simdlib::vshlq<11>)};
case 12:
return simd16uint16{detail::simdlib::unary_func(
data, detail::simdlib::vshlq<12>)};
case 13:
return simd16uint16{detail::simdlib::unary_func(
data, detail::simdlib::vshlq<13>)};
case 14:
return simd16uint16{detail::simdlib::unary_func(
data, detail::simdlib::vshlq<14>)};
case 15:
return simd16uint16{detail::simdlib::unary_func(
data, detail::simdlib::vshlq<15>)};
default:
FAISS_THROW_FMT("Invalid shift %d", shift);
}
}
simd16uint16 operator+=(const simd16uint16& other) {