Skip to content

Commit

Permalink
Disable the NEON optimisations on gcc <= 8 (#3869)
Browse files Browse the repository at this point in the history
Summary:
GCC7 doesnt support all the necessary NEON intrinsics, which is really a shame. However this means that for aarch64 GCC cannot compile faiss with neon intrinsics, so we should avoid using them.

This is similar to #3860, build issues on GCC7, which I need. This one is a bit uglier, since GCC7 does support NEON just not all of the intrinsics.

Pull Request resolved: #3869

Reviewed By: asadoughi

Differential Revision: D63081962

Pulled By: junjieqi

fbshipit-source-id: 69827cd447dd405b3ef70d651996f9ad00b6213e
  • Loading branch information
JAicewizard authored and facebook-github-bot committed Sep 26, 2024
1 parent d8aec60 commit 1ee7561
Showing 1 changed file with 25 additions and 16 deletions.
41 changes: 25 additions & 16 deletions faiss/impl/ScalarQuantizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,15 @@ namespace faiss {
#endif
#endif

#if defined(__aarch64__)
#if defined(__GNUC__) && __GNUC__ < 8
#warning \
"Cannot enable NEON optimizations in scalar quantizer if the compiler is GCC<8"
#else
#define USE_NEON
#endif
#endif

namespace {

typedef ScalarQuantizer::QuantizerType QuantizerType;
Expand Down Expand Up @@ -105,7 +114,7 @@ struct Codec8bit {
}
#endif

#ifdef __aarch64__
#ifdef USE_NEON
static FAISS_ALWAYS_INLINE float32x4x2_t
decode_8_components(const uint8_t* code, int i) {
float32_t result[8] = {};
Expand Down Expand Up @@ -175,7 +184,7 @@ struct Codec4bit {
}
#endif

#ifdef __aarch64__
#ifdef USE_NEON
static FAISS_ALWAYS_INLINE float32x4x2_t
decode_8_components(const uint8_t* code, int i) {
float32_t result[8] = {};
Expand Down Expand Up @@ -336,7 +345,7 @@ struct Codec6bit {

#endif

#ifdef __aarch64__
#ifdef USE_NEON
static FAISS_ALWAYS_INLINE float32x4x2_t
decode_8_components(const uint8_t* code, int i) {
float32_t result[8] = {};
Expand Down Expand Up @@ -437,7 +446,7 @@ struct QuantizerTemplate<Codec, QuantizerTemplateScaling::UNIFORM, 8>

#endif

#ifdef __aarch64__
#ifdef USE_NEON

template <class Codec>
struct QuantizerTemplate<Codec, QuantizerTemplateScaling::UNIFORM, 8>
Expand Down Expand Up @@ -546,7 +555,7 @@ struct QuantizerTemplate<Codec, QuantizerTemplateScaling::NON_UNIFORM, 8>

#endif

#ifdef __aarch64__
#ifdef USE_NEON

template <class Codec>
struct QuantizerTemplate<Codec, QuantizerTemplateScaling::NON_UNIFORM, 8>
Expand Down Expand Up @@ -634,7 +643,7 @@ struct QuantizerFP16<8> : QuantizerFP16<1> {

#endif

#ifdef __aarch64__
#ifdef USE_NEON

template <>
struct QuantizerFP16<8> : QuantizerFP16<1> {
Expand Down Expand Up @@ -714,7 +723,7 @@ struct QuantizerBF16<8> : QuantizerBF16<1> {

#endif

#ifdef __aarch64__
#ifdef USE_NEON

template <>
struct QuantizerBF16<8> : QuantizerBF16<1> {
Expand Down Expand Up @@ -795,7 +804,7 @@ struct Quantizer8bitDirect<8> : Quantizer8bitDirect<1> {

#endif

#ifdef __aarch64__
#ifdef USE_NEON

template <>
struct Quantizer8bitDirect<8> : Quantizer8bitDirect<1> {
Expand Down Expand Up @@ -884,7 +893,7 @@ struct Quantizer8bitDirectSigned<8> : Quantizer8bitDirectSigned<1> {

#endif

#ifdef __aarch64__
#ifdef USE_NEON

template <>
struct Quantizer8bitDirectSigned<8> : Quantizer8bitDirectSigned<1> {
Expand Down Expand Up @@ -1242,7 +1251,7 @@ struct SimilarityL2<8> {

#endif

#ifdef __aarch64__
#ifdef USE_NEON
template <>
struct SimilarityL2<8> {
static constexpr int simdwidth = 8;
Expand Down Expand Up @@ -1402,7 +1411,7 @@ struct SimilarityIP<8> {
};
#endif

#ifdef __aarch64__
#ifdef USE_NEON

template <>
struct SimilarityIP<8> {
Expand Down Expand Up @@ -1599,7 +1608,7 @@ struct DCTemplate<Quantizer, Similarity, 8> : SQDistanceComputer {

#endif

#ifdef __aarch64__
#ifdef USE_NEON

template <class Quantizer, class Similarity>
struct DCTemplate<Quantizer, Similarity, 8> : SQDistanceComputer {
Expand Down Expand Up @@ -1815,7 +1824,7 @@ struct DistanceComputerByte<Similarity, 8> : SQDistanceComputer {

#endif

#ifdef __aarch64__
#ifdef USE_NEON

template <class Similarity>
struct DistanceComputerByte<Similarity, 8> : SQDistanceComputer {
Expand Down Expand Up @@ -2041,7 +2050,7 @@ ScalarQuantizer::SQuantizer* ScalarQuantizer::select_quantizer() const {
if (d % 16 == 0) {
return select_quantizer_1<16>(qtype, d, trained);
} else
#elif defined(USE_F16C) || defined(__aarch64__)
#elif defined(USE_F16C) || defined(USE_NEON)
if (d % 8 == 0) {
return select_quantizer_1<8>(qtype, d, trained);
} else
Expand Down Expand Up @@ -2082,7 +2091,7 @@ SQDistanceComputer* ScalarQuantizer::get_distance_computer(
qtype, d, trained);
}
} else
#elif defined(USE_F16C) || defined(__aarch64__)
#elif defined(USE_F16C) || defined(USE_NEON)
if (d % 8 == 0) {
if (metric == METRIC_L2) {
return select_distance_computer<SimilarityL2<8>>(qtype, d, trained);
Expand Down Expand Up @@ -2457,7 +2466,7 @@ InvertedListScanner* ScalarQuantizer::select_InvertedListScanner(
return sel0_InvertedListScanner<16>(
mt, this, quantizer, store_pairs, sel, by_residual);
} else
#elif defined(USE_F16C) || defined(__aarch64__)
#elif defined(USE_F16C) || defined(USE_NEON)
if (d % 8 == 0) {
return sel0_InvertedListScanner<8>(
mt, this, quantizer, store_pairs, sel, by_residual);
Expand Down

0 comments on commit 1ee7561

Please sign in to comment.