From 2fa59931a6d24ac357157b46533c0f4dcd297492 Mon Sep 17 00:00:00 2001 From: Aditya Vidyadhar Kamath Date: Thu, 4 Apr 2024 07:37:35 -0500 Subject: [PATCH 01/15] Fix the endianness issue in AIX while running the benchmark. --- contrib/vecs_io.py | 3 +++ faiss/impl/pq4_fast_scan.cpp | 10 ++++++++++ faiss/python/CMakeLists.txt | 13 +++++++++++++ 3 files changed, 26 insertions(+) diff --git a/contrib/vecs_io.py b/contrib/vecs_io.py index ea75d5f94d..cd16a2b73d 100644 --- a/contrib/vecs_io.py +++ b/contrib/vecs_io.py @@ -3,6 +3,7 @@ # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. +import sys import numpy as np """ @@ -13,6 +14,8 @@ def ivecs_read(fname): a = np.fromfile(fname, dtype='int32') + if sys.byteorder == 'big': + a.byteswap(inplace=True) d = a[0] return a.reshape(-1, d + 1)[:, 1:].copy() diff --git a/faiss/impl/pq4_fast_scan.cpp b/faiss/impl/pq4_fast_scan.cpp index 6173ecef47..29e685d229 100644 --- a/faiss/impl/pq4_fast_scan.cpp +++ b/faiss/impl/pq4_fast_scan.cpp @@ -58,8 +58,13 @@ void pq4_pack_codes( return; } memset(blocks, 0, nb * nsq / 2); +#if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) + const uint8_t perm0[16] = { + 8, 0, 9, 1, 10, 2, 11, 3, 12, 4, 13, 5, 14, 6, 15, 7}; +#else const uint8_t perm0[16] = { 0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15}; +#endif uint8_t* codes2 = blocks; for (size_t i0 = 0; i0 < nb; i0 += bbs) { @@ -93,8 +98,13 @@ void pq4_pack_codes_range( size_t bbs, size_t nsq, uint8_t* blocks) { +#if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) + const uint8_t perm0[16] = { + 8, 0, 9, 1, 10, 2, 11, 3, 12, 4, 13, 5, 14, 6, 15, 7}; +#else const uint8_t perm0[16] = { 0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15}; +#endif // range of affected blocks size_t block0 = i0 / bbs; diff --git a/faiss/python/CMakeLists.txt b/faiss/python/CMakeLists.txt index 8bca710f5f..dee8c7762e 100644 --- a/faiss/python/CMakeLists.txt +++ b/faiss/python/CMakeLists.txt @@ -67,11 +67,20 @@ else() find_package(faiss REQUIRED) endif() +if (${CMAKE_SYSTEM_NAME} MATCHES "AIX") +swig_add_library(swigfaiss + TYPE MODULE + LANGUAGE python + SOURCES swigfaiss.swig +) +else () swig_add_library(swigfaiss TYPE SHARED LANGUAGE python SOURCES swigfaiss.swig ) +endif() + set_property(TARGET swigfaiss PROPERTY SWIG_COMPILE_OPTIONS -doxygen) set_property(SOURCE swigfaiss_avx2.swig @@ -160,6 +169,10 @@ set_property(TARGET faiss_python_callbacks PROPERTY POSITION_INDEPENDENT_CODE ON ) +if (${CMAKE_SYSTEM_NAME} MATCHES "AIX") +target_link_libraries(faiss_python_callbacks PRIVATE faiss) +endif() + # Hack so that python_callbacks.h can be included as # `#include `. target_include_directories(faiss_python_callbacks PRIVATE ${PROJECT_SOURCE_DIR}/../..) From a86c2fd186b68b5b1bc227cdf1ad1e5a6de1bdbc Mon Sep 17 00:00:00 2001 From: Aditya Vidyadhar Kamath Date: Thu, 4 Apr 2024 09:27:51 -0500 Subject: [PATCH 02/15] Attempt to Fix Windows Build break --- faiss/impl/pq4_fast_scan.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/faiss/impl/pq4_fast_scan.cpp b/faiss/impl/pq4_fast_scan.cpp index 29e685d229..0eba001161 100644 --- a/faiss/impl/pq4_fast_scan.cpp +++ b/faiss/impl/pq4_fast_scan.cpp @@ -58,7 +58,8 @@ void pq4_pack_codes( return; } memset(blocks, 0, nb * nsq / 2); -#if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) +#if !defined(_MSC_VER) || \ + (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)) const uint8_t perm0[16] = { 8, 0, 9, 1, 10, 2, 11, 3, 12, 4, 13, 5, 14, 6, 15, 7}; #else @@ -98,7 +99,8 @@ void pq4_pack_codes_range( size_t bbs, size_t nsq, uint8_t* blocks) { -#if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) +#if !defined(_MSC_VER) || \ + (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)) const uint8_t perm0[16] = { 8, 0, 9, 1, 10, 2, 11, 3, 12, 4, 13, 5, 14, 6, 15, 7}; #else From e0412c5f7d3947e5f0609ab3920106c1c3c9f9ff Mon Sep 17 00:00:00 2001 From: Aditya Vidyadhar Kamath Date: Thu, 4 Apr 2024 10:51:59 -0500 Subject: [PATCH 03/15] Change or to and --- faiss/impl/pq4_fast_scan.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/faiss/impl/pq4_fast_scan.cpp b/faiss/impl/pq4_fast_scan.cpp index 0eba001161..96f12b640e 100644 --- a/faiss/impl/pq4_fast_scan.cpp +++ b/faiss/impl/pq4_fast_scan.cpp @@ -58,7 +58,7 @@ void pq4_pack_codes( return; } memset(blocks, 0, nb * nsq / 2); -#if !defined(_MSC_VER) || \ +#if !defined(_MSC_VER) && \ (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)) const uint8_t perm0[16] = { 8, 0, 9, 1, 10, 2, 11, 3, 12, 4, 13, 5, 14, 6, 15, 7}; @@ -99,7 +99,7 @@ void pq4_pack_codes_range( size_t bbs, size_t nsq, uint8_t* blocks) { -#if !defined(_MSC_VER) || \ +#if !defined(_MSC_VER) && \ (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)) const uint8_t perm0[16] = { 8, 0, 9, 1, 10, 2, 11, 3, 12, 4, 13, 5, 14, 6, 15, 7}; From 0561f7d748e45948fae228a6671d537994cff51e Mon Sep 17 00:00:00 2001 From: Aditya Vidyadhar Kamath Date: Thu, 4 Apr 2024 11:26:52 -0500 Subject: [PATCH 04/15] Fix format issue --- faiss/impl/pq4_fast_scan.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/faiss/impl/pq4_fast_scan.cpp b/faiss/impl/pq4_fast_scan.cpp index 96f12b640e..e961eb4cff 100644 --- a/faiss/impl/pq4_fast_scan.cpp +++ b/faiss/impl/pq4_fast_scan.cpp @@ -59,7 +59,7 @@ void pq4_pack_codes( } memset(blocks, 0, nb * nsq / 2); #if !defined(_MSC_VER) && \ - (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)) + (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)) const uint8_t perm0[16] = { 8, 0, 9, 1, 10, 2, 11, 3, 12, 4, 13, 5, 14, 6, 15, 7}; #else @@ -100,7 +100,7 @@ void pq4_pack_codes_range( size_t nsq, uint8_t* blocks) { #if !defined(_MSC_VER) && \ - (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)) + (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)) const uint8_t perm0[16] = { 8, 0, 9, 1, 10, 2, 11, 3, 12, 4, 13, 5, 14, 6, 15, 7}; #else From aad8301ecd6803e124431f4203ac8c70cb1c0b9c Mon Sep 17 00:00:00 2001 From: Aditya Vidyadhar Kamath Date: Mon, 8 Apr 2024 02:21:51 -0500 Subject: [PATCH 05/15] Move macros to faiss/impl/platform_macros.h to simplify logic --- faiss/impl/platform_macros.h | 8 ++++++++ faiss/impl/pq4_fast_scan.cpp | 8 ++++---- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/faiss/impl/platform_macros.h b/faiss/impl/platform_macros.h index 2aecc51222..57e2343d1d 100644 --- a/faiss/impl/platform_macros.h +++ b/faiss/impl/platform_macros.h @@ -165,3 +165,11 @@ inline int __builtin_clzll(uint64_t x) { #endif // clang-format on + +/******************************************************* + * BIGENDIAN specific macros + *******************************************************/ +#if !defined(_MSC_VER) && \ + (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)) +#define FAISS_BIG_ENDIAN 1 +#endif diff --git a/faiss/impl/pq4_fast_scan.cpp b/faiss/impl/pq4_fast_scan.cpp index e961eb4cff..41a04d01ed 100644 --- a/faiss/impl/pq4_fast_scan.cpp +++ b/faiss/impl/pq4_fast_scan.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include @@ -50,6 +51,7 @@ void pq4_pack_codes( size_t bbs, size_t nsq, uint8_t* blocks) { + FAISS_THROW_IF_NOT(bbs % 32 == 0); FAISS_THROW_IF_NOT(nb % bbs == 0); FAISS_THROW_IF_NOT(nsq % 2 == 0); @@ -58,8 +60,7 @@ void pq4_pack_codes( return; } memset(blocks, 0, nb * nsq / 2); -#if !defined(_MSC_VER) && \ - (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)) +#ifdef FAISS_BIG_ENDIAN const uint8_t perm0[16] = { 8, 0, 9, 1, 10, 2, 11, 3, 12, 4, 13, 5, 14, 6, 15, 7}; #else @@ -99,8 +100,7 @@ void pq4_pack_codes_range( size_t bbs, size_t nsq, uint8_t* blocks) { -#if !defined(_MSC_VER) && \ - (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)) +#ifdef FAISS_BIG_ENDIAN const uint8_t perm0[16] = { 8, 0, 9, 1, 10, 2, 11, 3, 12, 4, 13, 5, 14, 6, 15, 7}; #else From be1a750d135a99333940f116a4e92ea0d4ee28e0 Mon Sep 17 00:00:00 2001 From: Aditya Vidyadhar Kamath Date: Mon, 8 Apr 2024 02:25:53 -0500 Subject: [PATCH 06/15] Fix Format issues --- faiss/impl/pq4_fast_scan.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/faiss/impl/pq4_fast_scan.cpp b/faiss/impl/pq4_fast_scan.cpp index 41a04d01ed..127646e0eb 100644 --- a/faiss/impl/pq4_fast_scan.cpp +++ b/faiss/impl/pq4_fast_scan.cpp @@ -6,9 +6,9 @@ */ #include +#include #include #include -#include #include @@ -51,7 +51,6 @@ void pq4_pack_codes( size_t bbs, size_t nsq, uint8_t* blocks) { - FAISS_THROW_IF_NOT(bbs % 32 == 0); FAISS_THROW_IF_NOT(nb % bbs == 0); FAISS_THROW_IF_NOT(nsq % 2 == 0); From db1a5ebd21cf73e625dafd80a7706d6bfd10b85b Mon Sep 17 00:00:00 2001 From: Aditya Vidyadhar Kamath Date: Mon, 8 Apr 2024 09:08:19 -0500 Subject: [PATCH 07/15] Merge test case fix due to endianness in AIX --- faiss/cppcontrib/detail/UintReader.h | 127 ++++++++++++++++++++---- faiss/cppcontrib/sa_decode/Level2-inl.h | 113 +++++++++++++++------ 2 files changed, 190 insertions(+), 50 deletions(-) diff --git a/faiss/cppcontrib/detail/UintReader.h b/faiss/cppcontrib/detail/UintReader.h index 81e600f410..a6439f6bb1 100644 --- a/faiss/cppcontrib/detail/UintReader.h +++ b/faiss/cppcontrib/detail/UintReader.h @@ -8,6 +8,16 @@ #pragma once #include +#include + +#ifdef FAISS_BIG_ENDIAN +#define Swap2Bytes(val) ((((val) >> 8) & 0x00FF) | (((val) << 8) & 0xFF00)) + +#define Swap4Bytes(val) \ + ((((val) >> 24) & 0x000000FF) | (((val) >> 8) & 0x0000FF00) | \ + (((val) << 8) & 0x00FF0000) | (((val) << 24) & 0xFF000000)) + +#endif namespace faiss { namespace cppcontrib { @@ -31,7 +41,11 @@ struct Uint8Reader { if (N_ELEMENTS > CPOS + 3) { const uint32_t code32 = *reinterpret_cast( codes + ELEMENT_TO_READ * 4); - return (code32 & 0x000000FF); +#ifdef FAISS_BIG_ENDIAN + return (code32) >> 24; +#else + return (code32 & 0x000000FF); +#endif } else { return codes[CPOS]; } @@ -40,7 +54,11 @@ struct Uint8Reader { if (N_ELEMENTS > CPOS + 2) { const uint32_t code32 = *reinterpret_cast( codes + ELEMENT_TO_READ * 4); +#ifdef FAISS_BIG_ENDIAN + return (code32 & 0x00FF0000) >> 16; +#else return (code32 & 0x0000FF00) >> 8; +#endif } else { return codes[CPOS]; } @@ -49,7 +67,11 @@ struct Uint8Reader { if (N_ELEMENTS > CPOS + 1) { const uint32_t code32 = *reinterpret_cast( codes + ELEMENT_TO_READ * 4); +#ifdef FAISS_BIG_ENDIAN + return (code32 & 0x0000FF00) >> 8; +#else return (code32 & 0x00FF0000) >> 16; +#endif } else { return codes[CPOS]; } @@ -58,7 +80,11 @@ struct Uint8Reader { if (N_ELEMENTS > CPOS) { const uint32_t code32 = *reinterpret_cast( codes + ELEMENT_TO_READ * 4); +#ifdef FAISS_BIG_ENDIAN + return (code32 & 0x000000FF); +#else return (code32) >> 24; +#endif } else { return codes[CPOS]; } @@ -87,40 +113,61 @@ struct Uint10Reader { switch (SUB_ELEMENT) { case 0: { if (N_ELEMENTS > CPOS + 2) { - const uint32_t code32 = *reinterpret_cast( + uint32_t code32 = *reinterpret_cast( codes + ELEMENT_TO_READ * 5); +#ifdef FAISS_BIG_ENDIAN + code32 = Swap4Bytes(code32); +#endif return (code32 & 0b0000001111111111); } else { - const uint16_t code16 = *reinterpret_cast( + uint16_t code16 = *reinterpret_cast( codes + ELEMENT_TO_READ * 5 + 0); +#ifdef FAISS_BIG_ENDIAN + code16 = Swap2Bytes(code16); +#endif return (code16 & 0b0000001111111111); } } case 1: { if (N_ELEMENTS > CPOS + 1) { - const uint32_t code32 = *reinterpret_cast( + uint32_t code32 = *reinterpret_cast( codes + ELEMENT_TO_READ * 5); +#ifdef FAISS_BIG_ENDIAN + code32 = Swap4Bytes(code32); +#endif return (code32 & 0b000011111111110000000000) >> 10; } else { - const uint16_t code16 = *reinterpret_cast( + uint16_t code16 = *reinterpret_cast( codes + ELEMENT_TO_READ * 5 + 1); +#ifdef FAISS_BIG_ENDIAN + code16 = Swap2Bytes(code16); +#endif return (code16 & 0b0000111111111100) >> 2; } } case 2: { if (N_ELEMENTS > CPOS) { - const uint32_t code32 = *reinterpret_cast( + uint32_t code32 = *reinterpret_cast( codes + ELEMENT_TO_READ * 5); +#ifdef FAISS_BIG_ENDIAN + code32 = Swap4Bytes(code32); +#endif return (code32 & 0b00111111111100000000000000000000) >> 20; } else { - const uint16_t code16 = *reinterpret_cast( + uint16_t code16 = *reinterpret_cast( codes + ELEMENT_TO_READ * 5 + 2); +#ifdef FAISS_BIG_ENDIAN + code16 = Swap2Bytes(code16); +#endif return (code16 & 0b0011111111110000) >> 4; } } case 3: { - const uint16_t code16 = *reinterpret_cast( + uint16_t code16 = *reinterpret_cast( codes + ELEMENT_TO_READ * 5 + 3); +#ifdef FAISS_BIG_ENDIAN + code16 = Swap2Bytes(code16); +#endif return (code16 & 0b1111111111000000) >> 6; } } @@ -147,45 +194,69 @@ struct Uint12Reader { switch (SUB_ELEMENT) { case 0: { if (N_ELEMENTS > CPOS + 2) { - const uint32_t code32 = *reinterpret_cast( + uint32_t code32 = *reinterpret_cast( codes + ELEMENT_TO_READ * 6); +#ifdef FAISS_BIG_ENDIAN + code32 = Swap4Bytes(code32); +#endif return (code32 & 0b0000111111111111); } else { - const uint16_t code16 = *reinterpret_cast( + uint16_t code16 = *reinterpret_cast( codes + ELEMENT_TO_READ * 6 + 0); +#ifdef FAISS_BIG_ENDIAN + code16 = Swap2Bytes(code16); +#endif return (code16 & 0b0000111111111111); } } case 1: { if (N_ELEMENTS > CPOS + 1) { - const uint32_t code32 = *reinterpret_cast( + uint32_t code32 = *reinterpret_cast( codes + ELEMENT_TO_READ * 6); +#ifdef FAISS_BIG_ENDIAN + code32 = Swap4Bytes(code32); +#endif return (code32 & 0b111111111111000000000000) >> 12; } else { - const uint16_t code16 = *reinterpret_cast( + uint16_t code16 = *reinterpret_cast( codes + ELEMENT_TO_READ * 6 + 1); +#ifdef FAISS_BIG_ENDIAN + code16 = Swap2Bytes(code16); +#endif return (code16 & 0b1111111111110000) >> 4; } } case 2: { if (N_ELEMENTS > CPOS + 1) { - const uint32_t code32 = *reinterpret_cast( + uint32_t code32 = *reinterpret_cast( codes + ELEMENT_TO_READ * 6 + 2); +#ifdef FAISS_BIG_ENDIAN + code32 = Swap4Bytes(code32); +#endif return (code32 & 0b000011111111111100000000) >> 8; } else { - const uint16_t code16 = *reinterpret_cast( + uint16_t code16 = *reinterpret_cast( codes + ELEMENT_TO_READ * 6 + 3); +#ifdef FAISS_BIG_ENDIAN + code16 = Swap2Bytes(code16); +#endif return (code16 & 0b0000111111111111); } } case 3: { if (N_ELEMENTS > CPOS) { - const uint32_t code32 = *reinterpret_cast( + uint32_t code32 = *reinterpret_cast( codes + ELEMENT_TO_READ * 6 + 2); +#ifdef FAISS_BIG_ENDIAN + code32 = Swap4Bytes(code32); +#endif return (code32 & 0b11111111111100000000000000000000) >> 20; } else { - const uint16_t code16 = *reinterpret_cast( + uint16_t code16 = *reinterpret_cast( codes + ELEMENT_TO_READ * 6 + 4); +#ifdef FAISS_BIG_ENDIAN + code16 = Swap2Bytes(code16); +#endif return (code16 & 0b1111111111110000) >> 4; } } @@ -208,23 +279,39 @@ struct Uint16Reader { switch (SUB_ELEMENT) { case 0: { if (N_ELEMENTS > CPOS + 1) { - const uint32_t code32 = *reinterpret_cast( + uint32_t code32 = *reinterpret_cast( codes + ELEMENT_TO_READ * 4); +#ifdef FAISS_BIG_ENDIAN + code32 = Swap4Bytes(code32); +#endif return (code32 & 0x0000FFFF); } else { - const uint16_t* const __restrict codesFp16 = + const uint16_t* const __restrict codesFp16 = reinterpret_cast(codes); - return codesFp16[CPOS]; +#ifdef FAISS_BIG_ENDIAN + uint16_t rt = codesFp16[CPOS]; + rt=Swap2Bytes(rt); + return rt; +#endif + return codesFp16[CPOS]; } } case 1: { if (N_ELEMENTS > CPOS) { - const uint32_t code32 = *reinterpret_cast( + uint32_t code32 = *reinterpret_cast( codes + ELEMENT_TO_READ * 4); +#ifdef FAISS_BIG_ENDIAN + code32 = Swap4Bytes(code32); +#endif return code32 >> 16; } else { const uint16_t* const __restrict codesFp16 = reinterpret_cast(codes); +#ifdef FAISS_BIG_ENDIAN + uint16_t rt = codesFp16[CPOS]; + rt=Swap2Bytes(rt); + return rt; +#endif return codesFp16[CPOS]; } } diff --git a/faiss/cppcontrib/sa_decode/Level2-inl.h b/faiss/cppcontrib/sa_decode/Level2-inl.h index 36355af001..7df97b6d69 100644 --- a/faiss/cppcontrib/sa_decode/Level2-inl.h +++ b/faiss/cppcontrib/sa_decode/Level2-inl.h @@ -12,6 +12,15 @@ #include #include +#include + +#ifdef FAISS_BIG_ENDIAN +#define Swap2Bytes(val) ((((val) >> 8) & 0x00FF) | (((val) << 8) & 0xFF00)) +#endif + +#ifndef FAISS_BIG_ENDIAN +#define FAISS_BIG_ENDIAN 0 +#endif namespace faiss { namespace cppcontrib { @@ -72,9 +81,14 @@ struct Index2LevelDecoder { const intptr_t coarseCentroidOffset = i % COARSE_SIZE; const intptr_t fineCentroidIdx = i / FINE_SIZE; const intptr_t fineCentroidOffset = i % FINE_SIZE; - - const intptr_t coarseCode = coarse[coarseCentroidIdx]; - const intptr_t fineCode = fine[fineCentroidIdx]; + intptr_t coarseCode, fineCode; + if (FAISS_BIG_ENDIAN && sizeof(coarse_storage_type) == 2) { + coarseCode = Swap2Bytes(coarse[coarseCentroidIdx]); + fineCode = Swap2Bytes(fine[fineCentroidIdx]); + } else { + coarseCode = coarse[coarseCentroidIdx]; + fineCode = fine[fineCentroidIdx]; + } const float* const __restrict coarsePtr = pqCoarseCentroids + (coarseCentroidIdx * COARSE_TABLE_BYTES + coarseCode) * @@ -112,9 +126,14 @@ struct Index2LevelDecoder { const intptr_t fineCentroidIdx = i / FINE_SIZE; const intptr_t fineCentroidOffset = i % FINE_SIZE; - const intptr_t coarseCode = coarse[coarseCentroidIdx]; - const intptr_t fineCode = fine[fineCentroidIdx]; - + intptr_t coarseCode, fineCode; + if (FAISS_BIG_ENDIAN && sizeof(coarse_storage_type) == 2) { + coarseCode = Swap2Bytes(coarse[coarseCentroidIdx]); + fineCode = Swap2Bytes(fine[fineCentroidIdx]); + } else { + coarseCode = coarse[coarseCentroidIdx]; + fineCode = fine[fineCentroidIdx]; + } const float* const __restrict coarsePtr = pqCoarseCentroids + (coarseCentroidIdx * COARSE_TABLE_BYTES + coarseCode) * COARSE_SIZE + @@ -162,11 +181,18 @@ struct Index2LevelDecoder { const intptr_t coarseCentroidOffset = i % COARSE_SIZE; const intptr_t fineCentroidIdx = i / FINE_SIZE; const intptr_t fineCentroidOffset = i % FINE_SIZE; - - const intptr_t coarseCode0 = coarse0[coarseCentroidIdx]; - const intptr_t fineCode0 = fine0[fineCentroidIdx]; - const intptr_t coarseCode1 = coarse1[coarseCentroidIdx]; - const intptr_t fineCode1 = fine1[fineCentroidIdx]; + intptr_t coarseCode0, coarseCode1, fineCode0, fineCode1; + if (FAISS_BIG_ENDIAN && sizeof(coarse_storage_type) == 2) { + coarseCode0 = Swap2Bytes(coarse0[coarseCentroidIdx]); + fineCode0 = Swap2Bytes(fine0[fineCentroidIdx]); + coarseCode1 = Swap2Bytes(coarse1[coarseCentroidIdx]); + fineCode1 = Swap2Bytes(fine1[fineCentroidIdx]); + } else { + coarseCode0 = coarse0[coarseCentroidIdx]; + fineCode0 = fine0[fineCentroidIdx]; + coarseCode1 = coarse1[coarseCentroidIdx]; + fineCode1 = fine1[fineCentroidIdx]; + } const float* const __restrict coarsePtr0 = pqCoarseCentroids0 + (coarseCentroidIdx * COARSE_TABLE_BYTES + coarseCode0) * @@ -222,11 +248,18 @@ struct Index2LevelDecoder { const intptr_t coarseCentroidOffset = i % COARSE_SIZE; const intptr_t fineCentroidIdx = i / FINE_SIZE; const intptr_t fineCentroidOffset = i % FINE_SIZE; - - const intptr_t coarseCode0 = coarse0[coarseCentroidIdx]; - const intptr_t fineCode0 = fine0[fineCentroidIdx]; - const intptr_t coarseCode1 = coarse1[coarseCentroidIdx]; - const intptr_t fineCode1 = fine1[fineCentroidIdx]; + intptr_t coarseCode0, coarseCode1, fineCode0, fineCode1; + if (FAISS_BIG_ENDIAN && sizeof(coarse_storage_type) == 2) { + coarseCode0 = Swap2Bytes(coarse0[coarseCentroidIdx]); + fineCode0 = Swap2Bytes(fine0[fineCentroidIdx]); + coarseCode1 = Swap2Bytes(coarse1[coarseCentroidIdx]); + fineCode1 = Swap2Bytes(fine1[fineCentroidIdx]); + } else { + coarseCode0 = coarse0[coarseCentroidIdx]; + fineCode0 = fine0[fineCentroidIdx]; + coarseCode1 = coarse1[coarseCentroidIdx]; + fineCode1 = fine1[fineCentroidIdx]; + } const float* const __restrict coarsePtr0 = pqCoarseCentroids + (coarseCentroidIdx * COARSE_TABLE_BYTES + coarseCode0) * @@ -292,13 +325,23 @@ struct Index2LevelDecoder { const intptr_t coarseCentroidOffset = i % COARSE_SIZE; const intptr_t fineCentroidIdx = i / FINE_SIZE; const intptr_t fineCentroidOffset = i % FINE_SIZE; - - const intptr_t coarseCode0 = coarse0[coarseCentroidIdx]; - const intptr_t fineCode0 = fine0[fineCentroidIdx]; - const intptr_t coarseCode1 = coarse1[coarseCentroidIdx]; - const intptr_t fineCode1 = fine1[fineCentroidIdx]; - const intptr_t coarseCode2 = coarse2[coarseCentroidIdx]; - const intptr_t fineCode2 = fine2[fineCentroidIdx]; + intptr_t coarseCode0, coarseCode1, fineCode0, fineCode1; + intptr_t coarseCode2, fineCode2; + if (FAISS_BIG_ENDIAN && sizeof(coarse_storage_type) == 2) { + coarseCode0 = Swap2Bytes(coarse0[coarseCentroidIdx]); + fineCode0 = Swap2Bytes(fine0[fineCentroidIdx]); + coarseCode1 = Swap2Bytes(coarse1[coarseCentroidIdx]); + fineCode1 = Swap2Bytes(fine1[fineCentroidIdx]); + coarseCode2 = Swap2Bytes(coarse2[coarseCentroidIdx]); + fineCode2 = Swap2Bytes(fine2[fineCentroidIdx]); + } else { + coarseCode0 = coarse0[coarseCentroidIdx]; + fineCode0 = fine0[fineCentroidIdx]; + coarseCode1 = coarse1[coarseCentroidIdx]; + fineCode1 = fine1[fineCentroidIdx]; + coarseCode2 = coarse2[coarseCentroidIdx]; + fineCode2 = fine2[fineCentroidIdx]; + } const float* const __restrict coarsePtr0 = pqCoarseCentroids0 + (coarseCentroidIdx * COARSE_TABLE_BYTES + coarseCode0) * @@ -369,13 +412,23 @@ struct Index2LevelDecoder { const intptr_t coarseCentroidOffset = i % COARSE_SIZE; const intptr_t fineCentroidIdx = i / FINE_SIZE; const intptr_t fineCentroidOffset = i % FINE_SIZE; - - const intptr_t coarseCode0 = coarse0[coarseCentroidIdx]; - const intptr_t fineCode0 = fine0[fineCentroidIdx]; - const intptr_t coarseCode1 = coarse1[coarseCentroidIdx]; - const intptr_t fineCode1 = fine1[fineCentroidIdx]; - const intptr_t coarseCode2 = coarse2[coarseCentroidIdx]; - const intptr_t fineCode2 = fine2[fineCentroidIdx]; + intptr_t coarseCode0, fineCode0, coarseCode1, fineCode1; + intptr_t coarseCode2, fineCode2; + if (FAISS_BIG_ENDIAN && sizeof(coarse_storage_type) == 2) { + coarseCode0 = Swap2Bytes(coarse0[coarseCentroidIdx]); + fineCode0 = Swap2Bytes(fine0[fineCentroidIdx]); + coarseCode1 = Swap2Bytes(coarse1[coarseCentroidIdx]); + fineCode1 = Swap2Bytes(fine1[fineCentroidIdx]); + coarseCode2 = Swap2Bytes(coarse2[coarseCentroidIdx]); + fineCode2 = Swap2Bytes(fine2[fineCentroidIdx]); + } else { + coarseCode0 = coarse0[coarseCentroidIdx]; + fineCode0 = fine0[fineCentroidIdx]; + coarseCode1 = coarse1[coarseCentroidIdx]; + fineCode1 = fine1[fineCentroidIdx]; + coarseCode2 = coarse2[coarseCentroidIdx]; + fineCode2 = fine2[fineCentroidIdx]; + } const float* const __restrict coarsePtr0 = pqCoarseCentroids + (coarseCentroidIdx * COARSE_TABLE_BYTES + coarseCode0) * From 82eac21eda2680b42143cad2c2f06172893700c9 Mon Sep 17 00:00:00 2001 From: Aditya Vidyadhar Kamath Date: Mon, 8 Apr 2024 09:14:35 -0500 Subject: [PATCH 08/15] fix Format issues --- faiss/cppcontrib/detail/UintReader.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/faiss/cppcontrib/detail/UintReader.h b/faiss/cppcontrib/detail/UintReader.h index a6439f6bb1..94ca580444 100644 --- a/faiss/cppcontrib/detail/UintReader.h +++ b/faiss/cppcontrib/detail/UintReader.h @@ -7,8 +7,8 @@ #pragma once -#include #include +#include #ifdef FAISS_BIG_ENDIAN #define Swap2Bytes(val) ((((val) >> 8) & 0x00FF) | (((val) << 8) & 0xFF00)) @@ -42,9 +42,9 @@ struct Uint8Reader { const uint32_t code32 = *reinterpret_cast( codes + ELEMENT_TO_READ * 4); #ifdef FAISS_BIG_ENDIAN - return (code32) >> 24; + return (code32) >> 24; #else - return (code32 & 0x000000FF); + return (code32 & 0x000000FF); #endif } else { return codes[CPOS]; @@ -286,14 +286,14 @@ struct Uint16Reader { #endif return (code32 & 0x0000FFFF); } else { - const uint16_t* const __restrict codesFp16 = + const uint16_t* const __restrict codesFp16 = reinterpret_cast(codes); #ifdef FAISS_BIG_ENDIAN - uint16_t rt = codesFp16[CPOS]; - rt=Swap2Bytes(rt); - return rt; + uint16_t rt = codesFp16[CPOS]; + rt=Swap2Bytes(rt); + return rt; #endif - return codesFp16[CPOS]; + return codesFp16[CPOS]; } } case 1: { @@ -309,7 +309,7 @@ struct Uint16Reader { reinterpret_cast(codes); #ifdef FAISS_BIG_ENDIAN uint16_t rt = codesFp16[CPOS]; - rt=Swap2Bytes(rt); + rt = Swap2Bytes(rt); return rt; #endif return codesFp16[CPOS]; From 391f93120efef918c91794be846d386131f84a45 Mon Sep 17 00:00:00 2001 From: Aditya Vidyadhar Kamath Date: Mon, 8 Apr 2024 09:18:01 -0500 Subject: [PATCH 09/15] fix Format issues --- faiss/cppcontrib/detail/UintReader.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/faiss/cppcontrib/detail/UintReader.h b/faiss/cppcontrib/detail/UintReader.h index 94ca580444..d0b583695d 100644 --- a/faiss/cppcontrib/detail/UintReader.h +++ b/faiss/cppcontrib/detail/UintReader.h @@ -290,7 +290,7 @@ struct Uint16Reader { reinterpret_cast(codes); #ifdef FAISS_BIG_ENDIAN uint16_t rt = codesFp16[CPOS]; - rt=Swap2Bytes(rt); + rt = Swap2Bytes(rt); return rt; #endif return codesFp16[CPOS]; From 553eba6e5a7c159b10ccede1520b948bd44e3f6b Mon Sep 17 00:00:00 2001 From: Aditya Vidyadhar Kamath Date: Mon, 8 Apr 2024 09:31:29 -0500 Subject: [PATCH 10/15] Add Swap2Bytes definition for little endian in Level2-inl.h --- faiss/cppcontrib/sa_decode/Level2-inl.h | 1 + 1 file changed, 1 insertion(+) diff --git a/faiss/cppcontrib/sa_decode/Level2-inl.h b/faiss/cppcontrib/sa_decode/Level2-inl.h index 7df97b6d69..4f9bdc4080 100644 --- a/faiss/cppcontrib/sa_decode/Level2-inl.h +++ b/faiss/cppcontrib/sa_decode/Level2-inl.h @@ -20,6 +20,7 @@ #ifndef FAISS_BIG_ENDIAN #define FAISS_BIG_ENDIAN 0 +#define Swap2Bytes(val) val #endif namespace faiss { From be9f6ef6d06b5819df4ef698b94a1dfc766bc119 Mon Sep 17 00:00:00 2001 From: Aditya Vidyadhar Kamath Date: Thu, 11 Apr 2024 12:41:21 -0500 Subject: [PATCH 11/15] Make FAISS_BIG_ENDIAN definition only in platform-macros.h --- faiss/cppcontrib/detail/UintReader.h | 55 +++++++++++-------------- faiss/cppcontrib/sa_decode/Level2-inl.h | 9 ---- faiss/impl/platform_macros.h | 8 ++++ faiss/impl/pq4_fast_scan.cpp | 4 +- 4 files changed, 33 insertions(+), 43 deletions(-) diff --git a/faiss/cppcontrib/detail/UintReader.h b/faiss/cppcontrib/detail/UintReader.h index d0b583695d..20102d9a23 100644 --- a/faiss/cppcontrib/detail/UintReader.h +++ b/faiss/cppcontrib/detail/UintReader.h @@ -10,15 +10,6 @@ #include #include -#ifdef FAISS_BIG_ENDIAN -#define Swap2Bytes(val) ((((val) >> 8) & 0x00FF) | (((val) << 8) & 0xFF00)) - -#define Swap4Bytes(val) \ - ((((val) >> 24) & 0x000000FF) | (((val) >> 8) & 0x0000FF00) | \ - (((val) << 8) & 0x00FF0000) | (((val) << 24) & 0xFF000000)) - -#endif - namespace faiss { namespace cppcontrib { namespace detail { @@ -41,7 +32,7 @@ struct Uint8Reader { if (N_ELEMENTS > CPOS + 3) { const uint32_t code32 = *reinterpret_cast( codes + ELEMENT_TO_READ * 4); -#ifdef FAISS_BIG_ENDIAN +#if FAISS_BIG_ENDIAN return (code32) >> 24; #else return (code32 & 0x000000FF); @@ -54,7 +45,7 @@ struct Uint8Reader { if (N_ELEMENTS > CPOS + 2) { const uint32_t code32 = *reinterpret_cast( codes + ELEMENT_TO_READ * 4); -#ifdef FAISS_BIG_ENDIAN +#if FAISS_BIG_ENDIAN return (code32 & 0x00FF0000) >> 16; #else return (code32 & 0x0000FF00) >> 8; @@ -67,7 +58,7 @@ struct Uint8Reader { if (N_ELEMENTS > CPOS + 1) { const uint32_t code32 = *reinterpret_cast( codes + ELEMENT_TO_READ * 4); -#ifdef FAISS_BIG_ENDIAN +#if FAISS_BIG_ENDIAN return (code32 & 0x0000FF00) >> 8; #else return (code32 & 0x00FF0000) >> 16; @@ -80,7 +71,7 @@ struct Uint8Reader { if (N_ELEMENTS > CPOS) { const uint32_t code32 = *reinterpret_cast( codes + ELEMENT_TO_READ * 4); -#ifdef FAISS_BIG_ENDIAN +#if FAISS_BIG_ENDIAN return (code32 & 0x000000FF); #else return (code32) >> 24; @@ -115,14 +106,14 @@ struct Uint10Reader { if (N_ELEMENTS > CPOS + 2) { uint32_t code32 = *reinterpret_cast( codes + ELEMENT_TO_READ * 5); -#ifdef FAISS_BIG_ENDIAN +#if FAISS_BIG_ENDIAN code32 = Swap4Bytes(code32); #endif return (code32 & 0b0000001111111111); } else { uint16_t code16 = *reinterpret_cast( codes + ELEMENT_TO_READ * 5 + 0); -#ifdef FAISS_BIG_ENDIAN +#if FAISS_BIG_ENDIAN code16 = Swap2Bytes(code16); #endif return (code16 & 0b0000001111111111); @@ -132,14 +123,14 @@ struct Uint10Reader { if (N_ELEMENTS > CPOS + 1) { uint32_t code32 = *reinterpret_cast( codes + ELEMENT_TO_READ * 5); -#ifdef FAISS_BIG_ENDIAN +#if FAISS_BIG_ENDIAN code32 = Swap4Bytes(code32); #endif return (code32 & 0b000011111111110000000000) >> 10; } else { uint16_t code16 = *reinterpret_cast( codes + ELEMENT_TO_READ * 5 + 1); -#ifdef FAISS_BIG_ENDIAN +#if FAISS_BIG_ENDIAN code16 = Swap2Bytes(code16); #endif return (code16 & 0b0000111111111100) >> 2; @@ -149,14 +140,14 @@ struct Uint10Reader { if (N_ELEMENTS > CPOS) { uint32_t code32 = *reinterpret_cast( codes + ELEMENT_TO_READ * 5); -#ifdef FAISS_BIG_ENDIAN +#if FAISS_BIG_ENDIAN code32 = Swap4Bytes(code32); #endif return (code32 & 0b00111111111100000000000000000000) >> 20; } else { uint16_t code16 = *reinterpret_cast( codes + ELEMENT_TO_READ * 5 + 2); -#ifdef FAISS_BIG_ENDIAN +#if FAISS_BIG_ENDIAN code16 = Swap2Bytes(code16); #endif return (code16 & 0b0011111111110000) >> 4; @@ -165,7 +156,7 @@ struct Uint10Reader { case 3: { uint16_t code16 = *reinterpret_cast( codes + ELEMENT_TO_READ * 5 + 3); -#ifdef FAISS_BIG_ENDIAN +#if FAISS_BIG_ENDIAN code16 = Swap2Bytes(code16); #endif return (code16 & 0b1111111111000000) >> 6; @@ -196,14 +187,14 @@ struct Uint12Reader { if (N_ELEMENTS > CPOS + 2) { uint32_t code32 = *reinterpret_cast( codes + ELEMENT_TO_READ * 6); -#ifdef FAISS_BIG_ENDIAN +#if FAISS_BIG_ENDIAN code32 = Swap4Bytes(code32); #endif return (code32 & 0b0000111111111111); } else { uint16_t code16 = *reinterpret_cast( codes + ELEMENT_TO_READ * 6 + 0); -#ifdef FAISS_BIG_ENDIAN +#if FAISS_BIG_ENDIAN code16 = Swap2Bytes(code16); #endif return (code16 & 0b0000111111111111); @@ -213,14 +204,14 @@ struct Uint12Reader { if (N_ELEMENTS > CPOS + 1) { uint32_t code32 = *reinterpret_cast( codes + ELEMENT_TO_READ * 6); -#ifdef FAISS_BIG_ENDIAN +#if FAISS_BIG_ENDIAN code32 = Swap4Bytes(code32); #endif return (code32 & 0b111111111111000000000000) >> 12; } else { uint16_t code16 = *reinterpret_cast( codes + ELEMENT_TO_READ * 6 + 1); -#ifdef FAISS_BIG_ENDIAN +#if FAISS_BIG_ENDIAN code16 = Swap2Bytes(code16); #endif return (code16 & 0b1111111111110000) >> 4; @@ -230,14 +221,14 @@ struct Uint12Reader { if (N_ELEMENTS > CPOS + 1) { uint32_t code32 = *reinterpret_cast( codes + ELEMENT_TO_READ * 6 + 2); -#ifdef FAISS_BIG_ENDIAN +#if FAISS_BIG_ENDIAN code32 = Swap4Bytes(code32); #endif return (code32 & 0b000011111111111100000000) >> 8; } else { uint16_t code16 = *reinterpret_cast( codes + ELEMENT_TO_READ * 6 + 3); -#ifdef FAISS_BIG_ENDIAN +#if FAISS_BIG_ENDIAN code16 = Swap2Bytes(code16); #endif return (code16 & 0b0000111111111111); @@ -247,14 +238,14 @@ struct Uint12Reader { if (N_ELEMENTS > CPOS) { uint32_t code32 = *reinterpret_cast( codes + ELEMENT_TO_READ * 6 + 2); -#ifdef FAISS_BIG_ENDIAN +#if FAISS_BIG_ENDIAN code32 = Swap4Bytes(code32); #endif return (code32 & 0b11111111111100000000000000000000) >> 20; } else { uint16_t code16 = *reinterpret_cast( codes + ELEMENT_TO_READ * 6 + 4); -#ifdef FAISS_BIG_ENDIAN +#if FAISS_BIG_ENDIAN code16 = Swap2Bytes(code16); #endif return (code16 & 0b1111111111110000) >> 4; @@ -281,14 +272,14 @@ struct Uint16Reader { if (N_ELEMENTS > CPOS + 1) { uint32_t code32 = *reinterpret_cast( codes + ELEMENT_TO_READ * 4); -#ifdef FAISS_BIG_ENDIAN +#if FAISS_BIG_ENDIAN code32 = Swap4Bytes(code32); #endif return (code32 & 0x0000FFFF); } else { const uint16_t* const __restrict codesFp16 = reinterpret_cast(codes); -#ifdef FAISS_BIG_ENDIAN +#if FAISS_BIG_ENDIAN uint16_t rt = codesFp16[CPOS]; rt = Swap2Bytes(rt); return rt; @@ -300,14 +291,14 @@ struct Uint16Reader { if (N_ELEMENTS > CPOS) { uint32_t code32 = *reinterpret_cast( codes + ELEMENT_TO_READ * 4); -#ifdef FAISS_BIG_ENDIAN +#if FAISS_BIG_ENDIAN code32 = Swap4Bytes(code32); #endif return code32 >> 16; } else { const uint16_t* const __restrict codesFp16 = reinterpret_cast(codes); -#ifdef FAISS_BIG_ENDIAN +#if FAISS_BIG_ENDIAN uint16_t rt = codesFp16[CPOS]; rt = Swap2Bytes(rt); return rt; diff --git a/faiss/cppcontrib/sa_decode/Level2-inl.h b/faiss/cppcontrib/sa_decode/Level2-inl.h index 4f9bdc4080..ceda063ba3 100644 --- a/faiss/cppcontrib/sa_decode/Level2-inl.h +++ b/faiss/cppcontrib/sa_decode/Level2-inl.h @@ -14,15 +14,6 @@ #include #include -#ifdef FAISS_BIG_ENDIAN -#define Swap2Bytes(val) ((((val) >> 8) & 0x00FF) | (((val) << 8) & 0xFF00)) -#endif - -#ifndef FAISS_BIG_ENDIAN -#define FAISS_BIG_ENDIAN 0 -#define Swap2Bytes(val) val -#endif - namespace faiss { namespace cppcontrib { diff --git a/faiss/impl/platform_macros.h b/faiss/impl/platform_macros.h index 57e2343d1d..68d9ac8e09 100644 --- a/faiss/impl/platform_macros.h +++ b/faiss/impl/platform_macros.h @@ -169,7 +169,15 @@ inline int __builtin_clzll(uint64_t x) { /******************************************************* * BIGENDIAN specific macros *******************************************************/ +#define FAISS_BIG_ENDIAN 0 #if !defined(_MSC_VER) && \ (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)) +#undef FAISS_BIG_ENDIAN #define FAISS_BIG_ENDIAN 1 #endif + +#define Swap2Bytes(val) ((((val) >> 8) & 0x00FF) | (((val) << 8) & 0xFF00)) + +#define Swap4Bytes(val) \ + ((((val) >> 24) & 0x000000FF) | (((val) >> 8) & 0x0000FF00) | \ + (((val) << 8) & 0x00FF0000) | (((val) << 24) & 0xFF000000)) diff --git a/faiss/impl/pq4_fast_scan.cpp b/faiss/impl/pq4_fast_scan.cpp index 127646e0eb..ed7659c16b 100644 --- a/faiss/impl/pq4_fast_scan.cpp +++ b/faiss/impl/pq4_fast_scan.cpp @@ -59,7 +59,7 @@ void pq4_pack_codes( return; } memset(blocks, 0, nb * nsq / 2); -#ifdef FAISS_BIG_ENDIAN +#if FAISS_BIG_ENDIAN const uint8_t perm0[16] = { 8, 0, 9, 1, 10, 2, 11, 3, 12, 4, 13, 5, 14, 6, 15, 7}; #else @@ -99,7 +99,7 @@ void pq4_pack_codes_range( size_t bbs, size_t nsq, uint8_t* blocks) { -#ifdef FAISS_BIG_ENDIAN +#if FAISS_BIG_ENDIAN const uint8_t perm0[16] = { 8, 0, 9, 1, 10, 2, 11, 3, 12, 4, 13, 5, 14, 6, 15, 7}; #else From 62caf799b0b43f4a92f024405988e8fd3ab1d99c Mon Sep 17 00:00:00 2001 From: Aditya Vidyadhar Kamath Date: Tue, 23 Apr 2024 02:23:51 -0500 Subject: [PATCH 12/15] Make FAISS_BIG_ENDIAN to not be defined as 0 or 1 --- faiss/cppcontrib/detail/UintReader.h | 46 ++++++++++++------------- faiss/cppcontrib/sa_decode/Level2-inl.h | 21 +++++++---- faiss/impl/platform_macros.h | 4 +-- faiss/impl/pq4_fast_scan.cpp | 4 +-- 4 files changed, 41 insertions(+), 34 deletions(-) diff --git a/faiss/cppcontrib/detail/UintReader.h b/faiss/cppcontrib/detail/UintReader.h index 20102d9a23..4a64a1a254 100644 --- a/faiss/cppcontrib/detail/UintReader.h +++ b/faiss/cppcontrib/detail/UintReader.h @@ -32,7 +32,7 @@ struct Uint8Reader { if (N_ELEMENTS > CPOS + 3) { const uint32_t code32 = *reinterpret_cast( codes + ELEMENT_TO_READ * 4); -#if FAISS_BIG_ENDIAN +#ifdef FAISS_BIG_ENDIAN return (code32) >> 24; #else return (code32 & 0x000000FF); @@ -45,7 +45,7 @@ struct Uint8Reader { if (N_ELEMENTS > CPOS + 2) { const uint32_t code32 = *reinterpret_cast( codes + ELEMENT_TO_READ * 4); -#if FAISS_BIG_ENDIAN +#ifdef FAISS_BIG_ENDIAN return (code32 & 0x00FF0000) >> 16; #else return (code32 & 0x0000FF00) >> 8; @@ -58,7 +58,7 @@ struct Uint8Reader { if (N_ELEMENTS > CPOS + 1) { const uint32_t code32 = *reinterpret_cast( codes + ELEMENT_TO_READ * 4); -#if FAISS_BIG_ENDIAN +#ifdef FAISS_BIG_ENDIAN return (code32 & 0x0000FF00) >> 8; #else return (code32 & 0x00FF0000) >> 16; @@ -71,7 +71,7 @@ struct Uint8Reader { if (N_ELEMENTS > CPOS) { const uint32_t code32 = *reinterpret_cast( codes + ELEMENT_TO_READ * 4); -#if FAISS_BIG_ENDIAN +#ifdef FAISS_BIG_ENDIAN return (code32 & 0x000000FF); #else return (code32) >> 24; @@ -106,14 +106,14 @@ struct Uint10Reader { if (N_ELEMENTS > CPOS + 2) { uint32_t code32 = *reinterpret_cast( codes + ELEMENT_TO_READ * 5); -#if FAISS_BIG_ENDIAN +#ifdef FAISS_BIG_ENDIAN code32 = Swap4Bytes(code32); #endif return (code32 & 0b0000001111111111); } else { uint16_t code16 = *reinterpret_cast( codes + ELEMENT_TO_READ * 5 + 0); -#if FAISS_BIG_ENDIAN +#ifdef FAISS_BIG_ENDIAN code16 = Swap2Bytes(code16); #endif return (code16 & 0b0000001111111111); @@ -123,14 +123,14 @@ struct Uint10Reader { if (N_ELEMENTS > CPOS + 1) { uint32_t code32 = *reinterpret_cast( codes + ELEMENT_TO_READ * 5); -#if FAISS_BIG_ENDIAN +#ifdef FAISS_BIG_ENDIAN code32 = Swap4Bytes(code32); #endif return (code32 & 0b000011111111110000000000) >> 10; } else { uint16_t code16 = *reinterpret_cast( codes + ELEMENT_TO_READ * 5 + 1); -#if FAISS_BIG_ENDIAN +#ifdef FAISS_BIG_ENDIAN code16 = Swap2Bytes(code16); #endif return (code16 & 0b0000111111111100) >> 2; @@ -140,14 +140,14 @@ struct Uint10Reader { if (N_ELEMENTS > CPOS) { uint32_t code32 = *reinterpret_cast( codes + ELEMENT_TO_READ * 5); -#if FAISS_BIG_ENDIAN +#ifdef FAISS_BIG_ENDIAN code32 = Swap4Bytes(code32); #endif return (code32 & 0b00111111111100000000000000000000) >> 20; } else { uint16_t code16 = *reinterpret_cast( codes + ELEMENT_TO_READ * 5 + 2); -#if FAISS_BIG_ENDIAN +#ifdef FAISS_BIG_ENDIAN code16 = Swap2Bytes(code16); #endif return (code16 & 0b0011111111110000) >> 4; @@ -156,7 +156,7 @@ struct Uint10Reader { case 3: { uint16_t code16 = *reinterpret_cast( codes + ELEMENT_TO_READ * 5 + 3); -#if FAISS_BIG_ENDIAN +#ifdef FAISS_BIG_ENDIAN code16 = Swap2Bytes(code16); #endif return (code16 & 0b1111111111000000) >> 6; @@ -187,14 +187,14 @@ struct Uint12Reader { if (N_ELEMENTS > CPOS + 2) { uint32_t code32 = *reinterpret_cast( codes + ELEMENT_TO_READ * 6); -#if FAISS_BIG_ENDIAN +#ifdef FAISS_BIG_ENDIAN code32 = Swap4Bytes(code32); #endif return (code32 & 0b0000111111111111); } else { uint16_t code16 = *reinterpret_cast( codes + ELEMENT_TO_READ * 6 + 0); -#if FAISS_BIG_ENDIAN +#ifdef FAISS_BIG_ENDIAN code16 = Swap2Bytes(code16); #endif return (code16 & 0b0000111111111111); @@ -204,14 +204,14 @@ struct Uint12Reader { if (N_ELEMENTS > CPOS + 1) { uint32_t code32 = *reinterpret_cast( codes + ELEMENT_TO_READ * 6); -#if FAISS_BIG_ENDIAN +#ifdef FAISS_BIG_ENDIAN code32 = Swap4Bytes(code32); #endif return (code32 & 0b111111111111000000000000) >> 12; } else { uint16_t code16 = *reinterpret_cast( codes + ELEMENT_TO_READ * 6 + 1); -#if FAISS_BIG_ENDIAN +#ifdef FAISS_BIG_ENDIAN code16 = Swap2Bytes(code16); #endif return (code16 & 0b1111111111110000) >> 4; @@ -221,14 +221,14 @@ struct Uint12Reader { if (N_ELEMENTS > CPOS + 1) { uint32_t code32 = *reinterpret_cast( codes + ELEMENT_TO_READ * 6 + 2); -#if FAISS_BIG_ENDIAN +#ifdef FAISS_BIG_ENDIAN code32 = Swap4Bytes(code32); #endif return (code32 & 0b000011111111111100000000) >> 8; } else { uint16_t code16 = *reinterpret_cast( codes + ELEMENT_TO_READ * 6 + 3); -#if FAISS_BIG_ENDIAN +#ifdef FAISS_BIG_ENDIAN code16 = Swap2Bytes(code16); #endif return (code16 & 0b0000111111111111); @@ -238,14 +238,14 @@ struct Uint12Reader { if (N_ELEMENTS > CPOS) { uint32_t code32 = *reinterpret_cast( codes + ELEMENT_TO_READ * 6 + 2); -#if FAISS_BIG_ENDIAN +#ifdef FAISS_BIG_ENDIAN code32 = Swap4Bytes(code32); #endif return (code32 & 0b11111111111100000000000000000000) >> 20; } else { uint16_t code16 = *reinterpret_cast( codes + ELEMENT_TO_READ * 6 + 4); -#if FAISS_BIG_ENDIAN +#ifdef FAISS_BIG_ENDIAN code16 = Swap2Bytes(code16); #endif return (code16 & 0b1111111111110000) >> 4; @@ -272,14 +272,14 @@ struct Uint16Reader { if (N_ELEMENTS > CPOS + 1) { uint32_t code32 = *reinterpret_cast( codes + ELEMENT_TO_READ * 4); -#if FAISS_BIG_ENDIAN +#ifdef FAISS_BIG_ENDIAN code32 = Swap4Bytes(code32); #endif return (code32 & 0x0000FFFF); } else { const uint16_t* const __restrict codesFp16 = reinterpret_cast(codes); -#if FAISS_BIG_ENDIAN +#ifdef FAISS_BIG_ENDIAN uint16_t rt = codesFp16[CPOS]; rt = Swap2Bytes(rt); return rt; @@ -291,14 +291,14 @@ struct Uint16Reader { if (N_ELEMENTS > CPOS) { uint32_t code32 = *reinterpret_cast( codes + ELEMENT_TO_READ * 4); -#if FAISS_BIG_ENDIAN +#ifdef FAISS_BIG_ENDIAN code32 = Swap4Bytes(code32); #endif return code32 >> 16; } else { const uint16_t* const __restrict codesFp16 = reinterpret_cast(codes); -#if FAISS_BIG_ENDIAN +#ifdef FAISS_BIG_ENDIAN uint16_t rt = codesFp16[CPOS]; rt = Swap2Bytes(rt); return rt; diff --git a/faiss/cppcontrib/sa_decode/Level2-inl.h b/faiss/cppcontrib/sa_decode/Level2-inl.h index ceda063ba3..07fcccf5a1 100644 --- a/faiss/cppcontrib/sa_decode/Level2-inl.h +++ b/faiss/cppcontrib/sa_decode/Level2-inl.h @@ -14,6 +14,15 @@ #include #include +bool isBigEndian() { + #ifdef FAISS_BIG_ENDIAN + return true; + #else + return false; + #endif +} + + namespace faiss { namespace cppcontrib { @@ -74,7 +83,7 @@ struct Index2LevelDecoder { const intptr_t fineCentroidIdx = i / FINE_SIZE; const intptr_t fineCentroidOffset = i % FINE_SIZE; intptr_t coarseCode, fineCode; - if (FAISS_BIG_ENDIAN && sizeof(coarse_storage_type) == 2) { + if (isBigEndian() && sizeof(coarse_storage_type) == 2) { coarseCode = Swap2Bytes(coarse[coarseCentroidIdx]); fineCode = Swap2Bytes(fine[fineCentroidIdx]); } else { @@ -119,7 +128,7 @@ struct Index2LevelDecoder { const intptr_t fineCentroidOffset = i % FINE_SIZE; intptr_t coarseCode, fineCode; - if (FAISS_BIG_ENDIAN && sizeof(coarse_storage_type) == 2) { + if (isBigEndian() && sizeof(coarse_storage_type) == 2) { coarseCode = Swap2Bytes(coarse[coarseCentroidIdx]); fineCode = Swap2Bytes(fine[fineCentroidIdx]); } else { @@ -174,7 +183,7 @@ struct Index2LevelDecoder { const intptr_t fineCentroidIdx = i / FINE_SIZE; const intptr_t fineCentroidOffset = i % FINE_SIZE; intptr_t coarseCode0, coarseCode1, fineCode0, fineCode1; - if (FAISS_BIG_ENDIAN && sizeof(coarse_storage_type) == 2) { + if (isBigEndian() && sizeof(coarse_storage_type) == 2) { coarseCode0 = Swap2Bytes(coarse0[coarseCentroidIdx]); fineCode0 = Swap2Bytes(fine0[fineCentroidIdx]); coarseCode1 = Swap2Bytes(coarse1[coarseCentroidIdx]); @@ -241,7 +250,7 @@ struct Index2LevelDecoder { const intptr_t fineCentroidIdx = i / FINE_SIZE; const intptr_t fineCentroidOffset = i % FINE_SIZE; intptr_t coarseCode0, coarseCode1, fineCode0, fineCode1; - if (FAISS_BIG_ENDIAN && sizeof(coarse_storage_type) == 2) { + if (isBigEndian() && sizeof(coarse_storage_type) == 2) { coarseCode0 = Swap2Bytes(coarse0[coarseCentroidIdx]); fineCode0 = Swap2Bytes(fine0[fineCentroidIdx]); coarseCode1 = Swap2Bytes(coarse1[coarseCentroidIdx]); @@ -319,7 +328,7 @@ struct Index2LevelDecoder { const intptr_t fineCentroidOffset = i % FINE_SIZE; intptr_t coarseCode0, coarseCode1, fineCode0, fineCode1; intptr_t coarseCode2, fineCode2; - if (FAISS_BIG_ENDIAN && sizeof(coarse_storage_type) == 2) { + if (isBigEndian() && sizeof(coarse_storage_type) == 2) { coarseCode0 = Swap2Bytes(coarse0[coarseCentroidIdx]); fineCode0 = Swap2Bytes(fine0[fineCentroidIdx]); coarseCode1 = Swap2Bytes(coarse1[coarseCentroidIdx]); @@ -406,7 +415,7 @@ struct Index2LevelDecoder { const intptr_t fineCentroidOffset = i % FINE_SIZE; intptr_t coarseCode0, fineCode0, coarseCode1, fineCode1; intptr_t coarseCode2, fineCode2; - if (FAISS_BIG_ENDIAN && sizeof(coarse_storage_type) == 2) { + if (isBigEndian() && sizeof(coarse_storage_type) == 2) { coarseCode0 = Swap2Bytes(coarse0[coarseCentroidIdx]); fineCode0 = Swap2Bytes(fine0[fineCentroidIdx]); coarseCode1 = Swap2Bytes(coarse1[coarseCentroidIdx]); diff --git a/faiss/impl/platform_macros.h b/faiss/impl/platform_macros.h index 68d9ac8e09..a0faea7cba 100644 --- a/faiss/impl/platform_macros.h +++ b/faiss/impl/platform_macros.h @@ -169,11 +169,9 @@ inline int __builtin_clzll(uint64_t x) { /******************************************************* * BIGENDIAN specific macros *******************************************************/ -#define FAISS_BIG_ENDIAN 0 #if !defined(_MSC_VER) && \ (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)) -#undef FAISS_BIG_ENDIAN -#define FAISS_BIG_ENDIAN 1 +#define FAISS_BIG_ENDIAN #endif #define Swap2Bytes(val) ((((val) >> 8) & 0x00FF) | (((val) << 8) & 0xFF00)) diff --git a/faiss/impl/pq4_fast_scan.cpp b/faiss/impl/pq4_fast_scan.cpp index ed7659c16b..127646e0eb 100644 --- a/faiss/impl/pq4_fast_scan.cpp +++ b/faiss/impl/pq4_fast_scan.cpp @@ -59,7 +59,7 @@ void pq4_pack_codes( return; } memset(blocks, 0, nb * nsq / 2); -#if FAISS_BIG_ENDIAN +#ifdef FAISS_BIG_ENDIAN const uint8_t perm0[16] = { 8, 0, 9, 1, 10, 2, 11, 3, 12, 4, 13, 5, 14, 6, 15, 7}; #else @@ -99,7 +99,7 @@ void pq4_pack_codes_range( size_t bbs, size_t nsq, uint8_t* blocks) { -#if FAISS_BIG_ENDIAN +#ifdef FAISS_BIG_ENDIAN const uint8_t perm0[16] = { 8, 0, 9, 1, 10, 2, 11, 3, 12, 4, 13, 5, 14, 6, 15, 7}; #else From dbfd236914e5350a6445496ca3d14545ab546f62 Mon Sep 17 00:00:00 2001 From: Aditya Vidyadhar Kamath Date: Tue, 23 Apr 2024 02:28:16 -0500 Subject: [PATCH 13/15] Fix Format issue --- faiss/cppcontrib/sa_decode/Level2-inl.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/faiss/cppcontrib/sa_decode/Level2-inl.h b/faiss/cppcontrib/sa_decode/Level2-inl.h index 07fcccf5a1..ead9c8c791 100644 --- a/faiss/cppcontrib/sa_decode/Level2-inl.h +++ b/faiss/cppcontrib/sa_decode/Level2-inl.h @@ -15,11 +15,11 @@ #include bool isBigEndian() { - #ifdef FAISS_BIG_ENDIAN - return true; - #else - return false; - #endif +#ifdef FAISS_BIG_ENDIAN + return true; +#else + return false; +#endif } From 1809905eff59702567ffdfbddf72dfda90f3bda4 Mon Sep 17 00:00:00 2001 From: Aditya Vidyadhar Kamath Date: Tue, 23 Apr 2024 02:31:44 -0500 Subject: [PATCH 14/15] Fix Format issue --- faiss/cppcontrib/sa_decode/Level2-inl.h | 1 - 1 file changed, 1 deletion(-) diff --git a/faiss/cppcontrib/sa_decode/Level2-inl.h b/faiss/cppcontrib/sa_decode/Level2-inl.h index ead9c8c791..3cc23da448 100644 --- a/faiss/cppcontrib/sa_decode/Level2-inl.h +++ b/faiss/cppcontrib/sa_decode/Level2-inl.h @@ -22,7 +22,6 @@ bool isBigEndian() { #endif } - namespace faiss { namespace cppcontrib { From d3bc22ea49333d6b6e7511f4c3422c0c41d8ffd2 Mon Sep 17 00:00:00 2001 From: Aditya Vidyadhar Kamath Date: Wed, 24 Apr 2024 01:43:39 -0500 Subject: [PATCH 15/15] Move isBigEndian to faiss::cppcontrib namespace --- faiss/cppcontrib/sa_decode/Level2-inl.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/faiss/cppcontrib/sa_decode/Level2-inl.h b/faiss/cppcontrib/sa_decode/Level2-inl.h index 3cc23da448..1eb7767ba8 100644 --- a/faiss/cppcontrib/sa_decode/Level2-inl.h +++ b/faiss/cppcontrib/sa_decode/Level2-inl.h @@ -14,6 +14,9 @@ #include #include +namespace faiss { +namespace cppcontrib { + bool isBigEndian() { #ifdef FAISS_BIG_ENDIAN return true; @@ -22,9 +25,6 @@ bool isBigEndian() { #endif } -namespace faiss { -namespace cppcontrib { - //////////////////////////////////////////////////////////////////////////////////// /// Index2LevelDecoder ////////////////////////////////////////////////////////////////////////////////////