From 5f08ae640a6176cfaf25aa22dd82f952f6f2d2ca Mon Sep 17 00:00:00 2001 From: Lloyd-Pottiger <60744015+Lloyd-Pottiger@users.noreply.github.com> Date: Tue, 27 Aug 2024 18:31:47 +0800 Subject: [PATCH] *: use SimSIMD for vectors (#9372) ref pingcap/tiflash#9032 *: use SimSIMD for vectors Signed-off-by: Lloyd-Pottiger --- .gitmodules | 3 + cmake/cpu_features.cmake | 2 +- contrib/CMakeLists.txt | 2 + contrib/simsimd | 1 + contrib/simsimd-cmake/CMakeLists.txt | 13 +++ contrib/usearch-cmake/CMakeLists.txt | 2 +- dbms/CMakeLists.txt | 18 +++- dbms/src/Common/TiFlashBuildInfo.cpp | 13 +++ .../tests/gtest_s3_lock_service.cpp | 2 +- dbms/src/Functions/FunctionsVector.h | 2 +- dbms/src/Functions/tests/gtest_vector.cpp | 55 +++++++----- .../Index/VectorIndexHNSW/Index.cpp | 12 ++- .../DeltaMerge/Index/VectorIndexHNSW/Index.h | 2 +- dbms/src/TiDB/Decode/Vector.cpp | 88 ++++++++++++------- .../src/VectorSearch/DistanceSIMDFeatures.cpp | 79 +++++++++++++++++ dbms/src/VectorSearch/DistanceSIMDFeatures.h | 29 ++++++ dbms/src/VectorSearch/SIMDFeatures.cpp | 32 +++++++ dbms/src/VectorSearch/SIMDFeatures.h | 28 ++++++ dbms/src/VectorSearch/USearch.h | 43 +++++++++ dbms/src/VectorSearch/simdsimd-internals.h | 45 ++++++++++ libs/libcommon/include/common/logger_useful.h | 2 +- libs/libdaemon/CMakeLists.txt | 2 +- libs/libdaemon/src/BaseDaemon.cpp | 2 +- 23 files changed, 404 insertions(+), 73 deletions(-) create mode 160000 contrib/simsimd create mode 100644 contrib/simsimd-cmake/CMakeLists.txt create mode 100644 dbms/src/VectorSearch/DistanceSIMDFeatures.cpp create mode 100644 dbms/src/VectorSearch/DistanceSIMDFeatures.h create mode 100644 dbms/src/VectorSearch/SIMDFeatures.cpp create mode 100644 dbms/src/VectorSearch/SIMDFeatures.h create mode 100644 dbms/src/VectorSearch/USearch.h create mode 100644 dbms/src/VectorSearch/simdsimd-internals.h diff --git a/.gitmodules b/.gitmodules index b31d1937054..bf4cfbb78b5 100644 --- a/.gitmodules +++ b/.gitmodules @@ -149,3 +149,6 @@ [submodule "contrib/usearch"] path = contrib/usearch url = https://github.com/unum-cloud/usearch.git +[submodule "contrib/simsimd"] + path = contrib/simsimd + url = https://github.com/ashvardanian/SimSIMD diff --git a/cmake/cpu_features.cmake b/cmake/cpu_features.cmake index 7637f3a6c37..ece1417ddfc 100644 --- a/cmake/cpu_features.cmake +++ b/cmake/cpu_features.cmake @@ -95,7 +95,7 @@ elseif (ARCH_AMD64) # so we do not set the flags to avoid core dump in old machines option (TIFLASH_ENABLE_AVX_SUPPORT "Use AVX/AVX2 instructions on x86_64" ON) option (TIFLASH_ENABLE_AVX512_SUPPORT "Use AVX512 instructions on x86_64" ON) - + # `haswell` was released since 2013 with cpu feature avx2, bmi2. It's a practical arch for optimizer option (TIFLASH_ENABLE_ARCH_HASWELL_SUPPORT "Use instructions based on architecture `haswell` on x86_64" ON) diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index fdd14e04fdf..d4ad3cc2615 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -200,3 +200,5 @@ add_subdirectory(simdjson) add_subdirectory(fastpforlib) add_subdirectory(usearch-cmake) + +add_subdirectory(simsimd-cmake) diff --git a/contrib/simsimd b/contrib/simsimd new file mode 160000 index 00000000000..3e21934fbe3 --- /dev/null +++ b/contrib/simsimd @@ -0,0 +1 @@ +Subproject commit 3e21934fbe33cbfbd175ab82a83d3cc937b21e2a diff --git a/contrib/simsimd-cmake/CMakeLists.txt b/contrib/simsimd-cmake/CMakeLists.txt new file mode 100644 index 00000000000..7b7a943a367 --- /dev/null +++ b/contrib/simsimd-cmake/CMakeLists.txt @@ -0,0 +1,13 @@ +set(SIMSIMD_PROJECT_DIR "${TiFlash_SOURCE_DIR}/contrib/simsimd") +set(SIMSIMD_SOURCE_DIR "${SIMSIMD_PROJECT_DIR}/include") + +add_library(_simsimd INTERFACE) + +if (NOT EXISTS "${SIMSIMD_SOURCE_DIR}/simsimd/simsimd.h") + message (FATAL_ERROR "submodule contrib/simsimd not found") +endif() + +target_include_directories(_simsimd SYSTEM INTERFACE + ${SIMSIMD_SOURCE_DIR}) + +add_library(tiflash_contrib::simsimd ALIAS _simsimd) diff --git a/contrib/usearch-cmake/CMakeLists.txt b/contrib/usearch-cmake/CMakeLists.txt index 5c2f4fb4942..740d1af9838 100644 --- a/contrib/usearch-cmake/CMakeLists.txt +++ b/contrib/usearch-cmake/CMakeLists.txt @@ -8,7 +8,7 @@ if (NOT EXISTS "${USEARCH_SOURCE_DIR}/usearch/index.hpp") endif () target_include_directories(_usearch SYSTEM INTERFACE - ${USEARCH_PROJECT_DIR}/simsimd/include + # ${USEARCH_PROJECT_DIR}/simsimd/include # Use our simsimd ${USEARCH_PROJECT_DIR}/fp16/include ${USEARCH_SOURCE_DIR}) diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index 9450602d296..5b4980e9de6 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -96,6 +96,8 @@ add_headers_and_sources(dbms src/Client) add_headers_only(dbms src/Flash/Coprocessor) add_headers_only(dbms src/Server) +add_headers_and_sources(tiflash_vector_search src/VectorSearch) + check_then_add_sources_compile_flag ( TIFLASH_ENABLE_ARCH_HASWELL_SUPPORT "${TIFLASH_COMPILER_ARCH_HASWELL_FLAG}" @@ -203,13 +205,25 @@ target_link_libraries (tiflash_common_io ) target_include_directories (tiflash_common_io BEFORE PRIVATE ${kvClient_SOURCE_DIR}/include) -target_compile_definitions(tiflash_common_io PUBLIC -DTIFLASH_SOURCE_PREFIX=\"${TiFlash_SOURCE_DIR}\") +target_compile_definitions (tiflash_common_io PUBLIC -DTIFLASH_SOURCE_PREFIX=\"${TiFlash_SOURCE_DIR}\") + +add_library(tiflash_vector_search + ${tiflash_vector_search_headers} + ${tiflash_vector_search_sources} +) +target_link_libraries(tiflash_vector_search + tiflash_contrib::usearch + tiflash_contrib::simsimd + + fmt +) + target_link_libraries (dbms ${OPENSSL_CRYPTO_LIBRARY} ${BTRIE_LIBRARIES} absl::synchronization - tiflash_contrib::usearch tiflash_contrib::aws_s3 + tiflash_vector_search etcdpb tiflash_parsers diff --git a/dbms/src/Common/TiFlashBuildInfo.cpp b/dbms/src/Common/TiFlashBuildInfo.cpp index 1ad87ea9667..e2227428233 100644 --- a/dbms/src/Common/TiFlashBuildInfo.cpp +++ b/dbms/src/Common/TiFlashBuildInfo.cpp @@ -15,6 +15,8 @@ #include #include #include +#include +#include #include #include #include @@ -140,6 +142,17 @@ String getEnabledFeatures() "fdo", #endif }; + { + auto f = DB::DM::VectorIndexHNSWSIMDFeatures::get(); + for (const auto & feature : f) + features.push_back(feature); + } + { + auto f = DB::VectorDistanceSIMDFeatures::get(); + for (const auto & feature : f) + features.push_back(feature); + } + return fmt::format("{}", fmt::join(features.begin(), features.end(), " ")); } // clang-format on diff --git a/dbms/src/Flash/Disaggregated/tests/gtest_s3_lock_service.cpp b/dbms/src/Flash/Disaggregated/tests/gtest_s3_lock_service.cpp index 5d220132be2..c4f6176e93d 100644 --- a/dbms/src/Flash/Disaggregated/tests/gtest_s3_lock_service.cpp +++ b/dbms/src/Flash/Disaggregated/tests/gtest_s3_lock_service.cpp @@ -110,7 +110,7 @@ class S3LockServiceTest : public DB::base::TiFlashStorageTestBasic #define CHECK_S3_ENABLED \ if (!is_s3_test_enabled) \ { \ - const auto * t = ::testing::UnitTest::GetInstance()->current_test_info(); \ + const auto * t = ::testing::UnitTest::GetInstance() -> current_test_info(); \ LOG_INFO(log, "{}.{} is skipped because S3ClientFactory is not inited.", t->test_case_name(), t->name()); \ return; \ } diff --git a/dbms/src/Functions/FunctionsVector.h b/dbms/src/Functions/FunctionsVector.h index 2e830338952..b4960200ce4 100644 --- a/dbms/src/Functions/FunctionsVector.h +++ b/dbms/src/Functions/FunctionsVector.h @@ -33,7 +33,7 @@ namespace DB { namespace ErrorCodes { -extern const int ILLEGAL_COLUMN; +extern const int ILLEGAL_TYPE_OF_ARGUMENT; } class FunctionsCastVectorFloat32AsString : public IFunction diff --git a/dbms/src/Functions/tests/gtest_vector.cpp b/dbms/src/Functions/tests/gtest_vector.cpp index d67eb683540..10c1cd668ce 100644 --- a/dbms/src/Functions/tests/gtest_vector.cpp +++ b/dbms/src/Functions/tests/gtest_vector.cpp @@ -203,29 +203,40 @@ TEST_F(Vector, CosineDistance) try { ASSERT_COLUMN_EQ( - createColumn>({0.0, std::nullopt, 0.0, 1.0, 2.0, 0.0, 2.0, std::nullopt}), + createColumn>( + {0.0, + 1.0, // CosDistance to (0,0) cannot be calculated, clapped to 1.0 + 0.0, + 1.0, + 2.0, + 0.0, + 2.0, + std::nullopt}), executeFunction( - "vecCosineDistance", - createColumn( - std::make_tuple(std::make_shared()), // - {Array{1.0, 2.0}, - Array{1.0, 2.0}, - Array{1.0, 1.0}, - Array{1.0, 0.0}, - Array{1.0, 1.0}, - Array{1.0, 1.0}, - Array{1.0, 1.0}, - Array{3e38}}), - createColumn( - std::make_tuple(std::make_shared()), // - {Array{2.0, 4.0}, - Array{0.0, 0.0}, - Array{1.0, 1.0}, - Array{0.0, 2.0}, - Array{-1.0, -1.0}, - Array{1.1, 1.1}, - Array{-1.1, -1.1}, - Array{3e38}}))); + "tidbRoundWithFrac", + executeFunction( + "vecCosineDistance", + createColumn( + std::make_tuple(std::make_shared()), // + {Array{1.0, 2.0}, + Array{1.0, 2.0}, + Array{1.0, 1.0}, + Array{1.0, 0.0}, + Array{1.0, 1.0}, + Array{1.0, 1.0}, + Array{1.0, 1.0}, + Array{3e38}}), + createColumn( + std::make_tuple(std::make_shared()), // + {Array{2.0, 4.0}, + Array{0.0, 0.0}, + Array{1.0, 1.0}, + Array{0.0, 2.0}, + Array{-1.0, -1.0}, + Array{1.1, 1.1}, + Array{-1.1, -1.1}, + Array{3e38}})), + createConstColumn(8, 1))); ASSERT_THROW( executeFunction( diff --git a/dbms/src/Storages/DeltaMerge/Index/VectorIndexHNSW/Index.cpp b/dbms/src/Storages/DeltaMerge/Index/VectorIndexHNSW/Index.cpp index 63fd1e69e90..43435d319e7 100644 --- a/dbms/src/Storages/DeltaMerge/Index/VectorIndexHNSW/Index.cpp +++ b/dbms/src/Storages/DeltaMerge/Index/VectorIndexHNSW/Index.cpp @@ -25,8 +25,6 @@ #include #include -#include -#include namespace DB::ErrorCodes { @@ -192,16 +190,16 @@ std::vector VectorIndexHNSWViewer::search( std::atomic discarded_nodes = 0; std::atomic has_exception_in_search = false; - // The non-valid rows should be discarded by this lambda - auto predicate = [&](typename USearchImplType::member_cref_t const & member) { + // The non-valid rows should be discarded by this lambda. + auto predicate = [&](const Key & key) { // Must catch exceptions in the predicate, because search runs on other threads. try { // Note: We don't increase the thread_local perf, because search runs on other threads. visited_nodes++; - if (!valid_rows[member.key]) + if (!valid_rows[key]) discarded_nodes++; - return valid_rows[member.key]; + return valid_rows[key]; } catch (...) { @@ -215,7 +213,7 @@ std::vector VectorIndexHNSWViewer::search( SCOPE_EXIT({ GET_METRIC(tiflash_vector_index_duration, type_search).Observe(w.elapsedSeconds()); }); // TODO(vector-index): Support efSearch. - auto result = index.search( // + auto result = index.filtered_search( // reinterpret_cast(query_info->ref_vec_f32().data() + sizeof(UInt32)), query_info->top_k(), predicate); diff --git a/dbms/src/Storages/DeltaMerge/Index/VectorIndexHNSW/Index.h b/dbms/src/Storages/DeltaMerge/Index/VectorIndexHNSW/Index.h index 616cd43d262..7be1b0f254f 100644 --- a/dbms/src/Storages/DeltaMerge/Index/VectorIndexHNSW/Index.h +++ b/dbms/src/Storages/DeltaMerge/Index/VectorIndexHNSW/Index.h @@ -16,7 +16,7 @@ #include #include -#include +#include namespace DB::DM { diff --git a/dbms/src/TiDB/Decode/Vector.cpp b/dbms/src/TiDB/Decode/Vector.cpp index 6a11c5a0737..86b5f79c70a 100644 --- a/dbms/src/TiDB/Decode/Vector.cpp +++ b/dbms/src/TiDB/Decode/Vector.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include @@ -50,15 +51,25 @@ Float64 VectorFloat32Ref::l2SquaredDistance(VectorFloat32Ref b) const { checkDims(b); - Float32 distance = 0.0; - Float32 diff; + static simsimd_metric_punned_t metric = nullptr; + static std::once_flag init_flag; - for (size_t i = 0, i_max = size(); i < i_max; ++i) - { - // Hope this can be vectorized. - diff = elements[i] - b[i]; - distance += diff * diff; - } + std::call_once(init_flag, []() { + simsimd_capability_t used_capability; + simsimd_find_metric_punned( + simsimd_metric_l2sq_k, + simsimd_datatype_f32_k, + simsimd_details::simd_capabilities(), + simsimd_cap_any_k, + &metric, + &used_capability); + }); + + if (!metric) + return std::numeric_limits::quiet_NaN(); + + simsimd_distance_t distance; + metric(elements, b.elements, elements_n, &distance); return distance; } @@ -67,13 +78,25 @@ Float64 VectorFloat32Ref::innerProduct(VectorFloat32Ref b) const { checkDims(b); - Float32 distance = 0.0; + static simsimd_metric_punned_t metric = nullptr; + static std::once_flag init_flag; - for (size_t i = 0, i_max = size(); i < i_max; ++i) - { - // Hope this can be vectorized. - distance += elements[i] * b[i]; - } + std::call_once(init_flag, []() { + simsimd_capability_t used_capability; + simsimd_find_metric_punned( + simsimd_metric_dot_k, + simsimd_datatype_f32_k, + simsimd_details::simd_capabilities(), + simsimd_cap_any_k, + &metric, + &used_capability); + }); + + if (!metric) + return std::numeric_limits::quiet_NaN(); + + simsimd_distance_t distance; + metric(elements, b.elements, elements_n, &distance); return distance; } @@ -82,30 +105,27 @@ Float64 VectorFloat32Ref::cosineDistance(VectorFloat32Ref b) const { checkDims(b); - Float32 distance = 0.0; - Float32 norma = 0.0; - Float32 normb = 0.0; + static simsimd_metric_punned_t metric = nullptr; + static std::once_flag init_flag; - for (size_t i = 0, i_max = size(); i < i_max; ++i) - { - // Hope this can be vectorized. - distance += elements[i] * b[i]; - norma += elements[i] * elements[i]; - normb += b[i] * b[i]; - } + std::call_once(init_flag, []() { + simsimd_capability_t used_capability; + simsimd_find_metric_punned( + simsimd_metric_cos_k, + simsimd_datatype_f32_k, + simsimd_details::simd_capabilities(), + simsimd_cap_any_k, + &metric, + &used_capability); + }); - Float64 similarity - = static_cast(distance) / std::sqrt(static_cast(norma) * static_cast(normb)); + if (!metric) + return std::numeric_limits::quiet_NaN(); - if (std::isnan(similarity)) - { - // When norma or normb is zero, distance is zero, and similarity is NaN. - // similarity can not be Inf in this case. - return std::nan(""); - } + simsimd_distance_t distance; + metric(elements, b.elements, elements_n, &distance); - similarity = std::clamp(similarity, -1.0, 1.0); - return 1.0 - similarity; + return distance; } Float64 VectorFloat32Ref::l1Distance(VectorFloat32Ref b) const diff --git a/dbms/src/VectorSearch/DistanceSIMDFeatures.cpp b/dbms/src/VectorSearch/DistanceSIMDFeatures.cpp new file mode 100644 index 00000000000..d6dd51cf2c6 --- /dev/null +++ b/dbms/src/VectorSearch/DistanceSIMDFeatures.cpp @@ -0,0 +1,79 @@ +// Copyright 2024 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// SIMSIMD is header only. We don't use cmake to make these defines to avoid +// polluting all compile units. + +#include +#include + +namespace simsimd_details +{ +simsimd_capability_t simd_capabilities() +{ + static simsimd_capability_t static_capabilities = simsimd_cap_any_k; + if (static_capabilities == simsimd_cap_any_k) + static_capabilities = simsimd_capabilities_implementation(); + return static_capabilities; +} + +simsimd_capability_t actual_capability(simsimd_datatype_t data_type, simsimd_metric_kind_t kind) +{ + simsimd_metric_punned_t metric = nullptr; + simsimd_capability_t used_capability; + simsimd_find_metric_punned( + kind, + data_type, + simsimd_details::simd_capabilities(), + simsimd_cap_any_k, + &metric, + &used_capability); + + return used_capability; +} +} // namespace simsimd_details + +namespace DB +{ + +std::vector VectorDistanceSIMDFeatures::get() +{ + simsimd_capability_t cap_l2 = simsimd_details::actual_capability(simsimd_datatype_f32_k, simsimd_metric_l2sq_k); + simsimd_capability_t cap_cos = simsimd_details::actual_capability(simsimd_datatype_f32_k, simsimd_metric_cos_k); + + auto cap_to_string = [](simsimd_capability_t cap) -> std::string { + switch (cap) + { + case simsimd_cap_sve2_k: + return "sve2"; + case simsimd_cap_sve_k: + return "sve"; + case simsimd_cap_neon_k: + return "neon"; + case simsimd_cap_skylake_k: + return "skylake"; + case simsimd_cap_haswell_k: + return "haswell"; + default: + return "serial"; + } + }; + + std::vector ret{}; + ret.push_back("vec.l2=" + cap_to_string(cap_l2)); + ret.push_back("vec.cos=" + cap_to_string(cap_cos)); + return ret; +} + +} // namespace DB diff --git a/dbms/src/VectorSearch/DistanceSIMDFeatures.h b/dbms/src/VectorSearch/DistanceSIMDFeatures.h new file mode 100644 index 00000000000..63807c12cd8 --- /dev/null +++ b/dbms/src/VectorSearch/DistanceSIMDFeatures.h @@ -0,0 +1,29 @@ +// Copyright 2024 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include + +namespace DB +{ + +class VectorDistanceSIMDFeatures +{ +public: + static std::vector get(); +}; + +} // namespace DB diff --git a/dbms/src/VectorSearch/SIMDFeatures.cpp b/dbms/src/VectorSearch/SIMDFeatures.cpp new file mode 100644 index 00000000000..92eb9c4f1b2 --- /dev/null +++ b/dbms/src/VectorSearch/SIMDFeatures.cpp @@ -0,0 +1,32 @@ +// Copyright 2024 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +namespace DB::DM +{ + +std::vector VectorIndexHNSWSIMDFeatures::get() +{ + auto m_l2 = unum::usearch::metric_punned_t(3, unum::usearch::metric_kind_t::l2sq_k); + auto m_cos = unum::usearch::metric_punned_t(3, unum::usearch::metric_kind_t::cos_k); + return { + fmt::format("hnsw.l2={}", m_l2.isa_name()), + fmt::format("hnsw.cosine={}", m_cos.isa_name()), + }; +} + +} // namespace DB::DM diff --git a/dbms/src/VectorSearch/SIMDFeatures.h b/dbms/src/VectorSearch/SIMDFeatures.h new file mode 100644 index 00000000000..28ed4bcd9a2 --- /dev/null +++ b/dbms/src/VectorSearch/SIMDFeatures.h @@ -0,0 +1,28 @@ +// Copyright 2024 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include + +namespace DB::DM +{ + +class VectorIndexHNSWSIMDFeatures +{ +public: + static std::vector get(); +}; + +} // namespace DB::DM diff --git a/dbms/src/VectorSearch/USearch.h b/dbms/src/VectorSearch/USearch.h new file mode 100644 index 00000000000..a54bc7f08c8 --- /dev/null +++ b/dbms/src/VectorSearch/USearch.h @@ -0,0 +1,43 @@ +// Copyright 2024 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +// USearch is header only. We don't use cmake to make these defines to avoid +// polluting all compile units. + +#define USEARCH_USE_SIMSIMD 1 +#define SIMSIMD_NATIVE_F16 0 +#define SIMSIMD_NATIVE_BF16 0 + +// Force enable all target features. +#define SIMSIMD_TARGET_NEON 1 +#define SIMSIMD_TARGET_SVE 0 // Clang13's header does not support enableing SVE for region +#define SIMSIMD_TARGET_HASWELL 1 +#define SIMSIMD_TARGET_SKYLAKE 0 // Clang13 does not support AVX512 +#define SIMSIMD_TARGET_ICE 0 +#define SIMSIMD_TARGET_GENOA 0 +#define SIMSIMD_TARGET_SAPPHIRE 0 + +#if __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wpass-failed" + +#include +#include +#include +#include + +#pragma clang diagnostic pop +#endif diff --git a/dbms/src/VectorSearch/simdsimd-internals.h b/dbms/src/VectorSearch/simdsimd-internals.h new file mode 100644 index 00000000000..45ded1a1647 --- /dev/null +++ b/dbms/src/VectorSearch/simdsimd-internals.h @@ -0,0 +1,45 @@ +// Copyright 2024 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// SIMSIMD is header only. We don't use cmake to make these defines to avoid +// polluting all compile units. + +#pragma once + +// Note: Be careful that usearch also includes simsimd with a customized config. +// Don't include simsimd and usearch at the same time. Otherwise, the effective +// config depends on the include order. +#define SIMSIMD_NATIVE_F16 0 +#define SIMSIMD_NATIVE_BF16 0 +#define SIMSIMD_DYNAMIC_DISPATCH 0 + +// Force enable all target features. We will do our own dynamic dispatch. +#define SIMSIMD_TARGET_NEON 1 +#define SIMSIMD_TARGET_SVE 0 // Clang13's header does not support enableing SVE for region +#define SIMSIMD_TARGET_HASWELL 1 +#define SIMSIMD_TARGET_SKYLAKE 1 +#define SIMSIMD_TARGET_ICE 0 +#define SIMSIMD_TARGET_GENOA 0 +#define SIMSIMD_TARGET_SAPPHIRE 0 +#include + + +namespace simsimd_details +{ + +simsimd_capability_t simd_capabilities(); + +simsimd_capability_t actual_capability(simsimd_datatype_t data_type, simsimd_metric_kind_t kind); + +} // namespace simsimd_details diff --git a/libs/libcommon/include/common/logger_useful.h b/libs/libcommon/include/common/logger_useful.h index f28c1919a44..f4902bc6c7b 100644 --- a/libs/libcommon/include/common/logger_useful.h +++ b/libs/libcommon/include/common/logger_useful.h @@ -45,7 +45,7 @@ inline constexpr size_t getFileNameOffset(const T (&str)[S], size_t i = S - 1) } template -inline constexpr size_t getFileNameOffset(T (&/*str*/)[1]) +inline constexpr size_t getFileNameOffset(T (& /*str*/)[1]) { return 0; } diff --git a/libs/libdaemon/CMakeLists.txt b/libs/libdaemon/CMakeLists.txt index 22589259caf..b5107576316 100644 --- a/libs/libdaemon/CMakeLists.txt +++ b/libs/libdaemon/CMakeLists.txt @@ -35,7 +35,7 @@ endif () target_include_directories (daemon PUBLIC include) target_include_directories (daemon PRIVATE ${TiFlash_SOURCE_DIR}/libs/libpocoext/include) -target_link_libraries (daemon tiflash_common_io tiflash_common_config grpc grpc++ ${EXECINFO_LIBRARY}) +target_link_libraries (daemon tiflash_vector_search tiflash_common_io tiflash_common_config grpc grpc++ ${EXECINFO_LIBRARY}) if (ENABLE_TESTS) add_subdirectory (src/tests EXCLUDE_FROM_ALL) endif () diff --git a/libs/libdaemon/src/BaseDaemon.cpp b/libs/libdaemon/src/BaseDaemon.cpp index a3975226bf1..c4ec808a725 100644 --- a/libs/libdaemon/src/BaseDaemon.cpp +++ b/libs/libdaemon/src/BaseDaemon.cpp @@ -653,7 +653,7 @@ static std::string createDirectory(const std::string & file) return ""; Poco::File(path).createDirectories(); return path.toString(); -}; +} static bool tryCreateDirectories(Poco::Logger * logger, const std::string & path) {