From 68db36f4c471a1336b379a11b3f280b417551cb1 Mon Sep 17 00:00:00 2001 From: Konstantinos Margaritis Date: Wed, 23 Aug 2023 09:42:00 +0000 Subject: [PATCH 1/2] initial attempt for fat binary on Aarch64 --- CMakeLists.txt | 363 +++++++++++++++++++------------ cmake/arch.cmake | 40 ++-- src/dispatcher.c | 36 ++- src/hs.cpp | 9 +- src/hs_valid_platform.c | 8 +- src/util/arch/arm/cpuid_inline.h | 61 ++++++ 6 files changed, 353 insertions(+), 164 deletions(-) create mode 100644 src/util/arch/arm/cpuid_inline.h diff --git a/CMakeLists.txt b/CMakeLists.txt index e90d8c98d..bc4c98466 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -249,13 +249,18 @@ if (ARCH_IA32 OR ARCH_X86_64) endif() if (ARCH_AARCH64) - if (BUILD_SVE2_BITPERM AND NOT SVE2_BITPERM_FOUND) - set(GNUCC_ARCH "${GNUCC_ARCH}+sve2-bitperm") - elseif (BUILD_SVE2 AND NOT SVE2_FOUND) - set(GNUCC_ARCH "${GNUCC_ARCH}+sve2") - elseif (BUILD_SVE AND NOT SVE_FOUND) - set(GNUCC_ARCH "${GNUCC_ARCH}+sve") - endif () + if (NOT FAT_RUNTIME) + if (BUILD_SVE2_BITPERM AND NOT SVE2_BITPERM_FOUND) + set(GNUCC_ARCH "${GNUCC_ARCH}+sve2-bitperm") + elseif (BUILD_SVE2 AND NOT SVE2_FOUND) + set(GNUCC_ARCH "${GNUCC_ARCH}+sve2") + elseif (BUILD_SVE AND NOT SVE_FOUND) + set(GNUCC_ARCH "${GNUCC_ARCH}+sve") + endif () + else() + set(ARCH_C_FLAGS "") + set(ARCH_CXX_FLAGS "") + endif() endif(ARCH_AARCH64) message(STATUS "ARCH_C_FLAGS : ${ARCH_C_FLAGS}") @@ -271,24 +276,6 @@ if (NOT FAT_RUNTIME) endif() endif() -#if (ARCH_IA32 OR ARCH_X86_64 OR ARCH_ARM32 OR ARCH_AARCH64) -# if (NOT CMAKE_C_FLAGS MATCHES .*march.* AND NOT CMAKE_C_FLAGS MATCHES .*mtune.*) -# set(ARCH_C_FLAGS "-march=${GNUCC_ARCH} -mtune=${TUNE_FLAG}") -# endif() -# if (NOT CMAKE_CXX_FLAGS MATCHES .*march.* AND NOT CMAKE_CXX_FLAGS MATCHES .*mtune.*) -# set(ARCH_CXX_FLAGS "-march=${GNUCC_ARCH} -mtune=${TUNE_FLAG}") -# endif() -#endif() - -#if(ARCH_PPC64EL) -# if (NOT CMAKE_C_FLAGS MATCHES .*march.* AND NOT CMAKE_C_FLAGS MATCHES .*mtune.*) -# set(ARCH_C_FLAGS "-mtune=${TUNE_FLAG}") -# endif() -# if (NOT CMAKE_CXX_FLAGS MATCHES .*march.* AND NOT CMAKE_CXX_FLAGS MATCHES .*mtune.*) -# set(ARCH_CXX_FLAGS "-mtune=${TUNE_FLAG}") -# endif() -#endif() - # compiler version checks TODO: test more compilers if (CMAKE_COMPILER_IS_GNUCXX) set(GNUCXX_MINVER "9") @@ -396,6 +383,7 @@ endif() option(FAT_RUNTIME "Build a library that supports multiple microarchitectures" ON) if (CMAKE_SYSTEM_NAME MATCHES "Linux" AND FAT_RUNTIME MATCHES "ON") + message("Fat Runtime for ${GNUCC_ARCH}") # This is a Linux-only feature for now - requires platform support # elsewhere message(STATUS "generator is ${CMAKE_GENERATOR}") @@ -529,8 +517,8 @@ endif(CMAKE_SYSTEM_NAME MATCHES "FreeBSD") if (FAT_RUNTIME) - if (NOT (ARCH_IA32 OR ARCH_X86_64)) - message(FATAL_ERROR "Fat runtime is not supported on non-Intel architectures") + if (NOT (ARCH_IA32 OR ARCH_X86_64 OR ARCH_AARCH64)) + message(FATAL_ERROR "Fat runtime is only supported on Intel and Aarch64 architectures") else() message(STATUS "Building runtime for multiple microarchitectures") endif() @@ -790,7 +778,7 @@ set (hs_exec_SRCS endif () endif() -if (NOT BUILD_SVE2) +if (FAT_RUNTIME OR (NOT FAT_RUNTIME AND NOT BUILD_SVE2)) set (hs_exec_SRCS ${hs_exec_SRCS} src/nfa/vermicelli_simd.cpp) @@ -1273,137 +1261,222 @@ if (NOT FAT_RUNTIME) add_library(hs_compile_shared OBJECT ${hs_compile_SRCS}) set_target_properties(hs_compile_shared PROPERTIES POSITION_INDEPENDENT_CODE TRUE) endif() +else () + if (ARCH_IA32 OR ARCH_X86_64) + set(BUILD_WRAPPER "${PROJECT_SOURCE_DIR}/cmake/build_wrapper.sh") + if (NOT BUILD_AVX512) + set (DISPATCHER_DEFINE "-DDISABLE_AVX512_DISPATCH") + endif (NOT BUILD_AVX512) + if (NOT BUILD_AVX512VBMI) + set (DISPATCHER_DEFINE "${DISPATCHER_DEFINE} -DDISABLE_AVX512VBMI_DISPATCH") + endif (NOT BUILD_AVX512VBMI) + set_source_files_properties(src/dispatcher.c PROPERTIES + COMPILE_FLAGS "-Wno-unused-parameter -Wno-unused-function ${DISPATCHER_DEFINE}") + + if (BUILD_STATIC_LIBS) + add_library(hs_exec_core2 OBJECT ${hs_exec_SRCS}) + list(APPEND RUNTIME_LIBS $) + set_target_properties(hs_exec_core2 PROPERTIES + COMPILE_FLAGS "-march=core2 -msse4.2" + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} core2 ${CMAKE_MODULE_PATH}/keep.syms.in" + ) -else (FAT_RUNTIME) - - set(BUILD_WRAPPER "${PROJECT_SOURCE_DIR}/cmake/build_wrapper.sh") - if (NOT BUILD_AVX512) - set (DISPATCHER_DEFINE "-DDISABLE_AVX512_DISPATCH") - endif (NOT BUILD_AVX512) - if (NOT BUILD_AVX512VBMI) - set (DISPATCHER_DEFINE "${DISPATCHER_DEFINE} -DDISABLE_AVX512VBMI_DISPATCH") - endif (NOT BUILD_AVX512VBMI) - set_source_files_properties(src/dispatcher.c PROPERTIES - COMPILE_FLAGS "-Wno-unused-parameter -Wno-unused-function ${DISPATCHER_DEFINE}") - - if (BUILD_STATIC_LIBS) - add_library(hs_exec_core2 OBJECT ${hs_exec_SRCS}) - list(APPEND RUNTIME_LIBS $) - set_target_properties(hs_exec_core2 PROPERTIES - COMPILE_FLAGS "-march=core2 -msse4.2" - RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} core2 ${CMAKE_MODULE_PATH}/keep.syms.in" - ) - - add_library(hs_exec_corei7 OBJECT ${hs_exec_SRCS}) - list(APPEND RUNTIME_LIBS $) - set_target_properties(hs_exec_corei7 PROPERTIES - COMPILE_FLAGS "-march=corei7 -msse4.2" - RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} corei7 ${CMAKE_MODULE_PATH}/keep.syms.in" - ) + add_library(hs_exec_corei7 OBJECT ${hs_exec_SRCS}) + list(APPEND RUNTIME_LIBS $) + set_target_properties(hs_exec_corei7 PROPERTIES + COMPILE_FLAGS "-march=corei7 -msse4.2" + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} corei7 ${CMAKE_MODULE_PATH}/keep.syms.in" + ) - if (BUILD_AVX2) - add_library(hs_exec_avx2 OBJECT ${hs_exec_SRCS} ${hs_exec_avx2_SRCS}) - list(APPEND RUNTIME_LIBS $) - set_target_properties(hs_exec_avx2 PROPERTIES - COMPILE_FLAGS "-march=core-avx2 -mavx2" - RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx2 ${CMAKE_MODULE_PATH}/keep.syms.in" + if (BUILD_AVX2) + add_library(hs_exec_avx2 OBJECT ${hs_exec_SRCS} ${hs_exec_avx2_SRCS}) + list(APPEND RUNTIME_LIBS $) + set_target_properties(hs_exec_avx2 PROPERTIES + COMPILE_FLAGS "-march=core-avx2 -mavx2" + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx2 ${CMAKE_MODULE_PATH}/keep.syms.in" + ) + endif (BUILD_AVX2) + if (BUILD_AVX512) + add_library(hs_exec_avx512 OBJECT ${hs_exec_SRCS} ${hs_exec_avx2_SRCS}) + list(APPEND RUNTIME_LIBS $) + set_target_properties(hs_exec_avx512 PROPERTIES + COMPILE_FLAGS "${SKYLAKE_FLAG}" + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx512 ${CMAKE_MODULE_PATH}/keep.syms.in" + ) + endif (BUILD_AVX512) + if (BUILD_AVX512VBMI) + add_library(hs_exec_avx512vbmi OBJECT ${hs_exec_SRCS} ${hs_exec_avx2_SRCS}) + list(APPEND RUNTIME_LIBS $) + set_target_properties(hs_exec_avx512vbmi PROPERTIES + COMPILE_FLAGS "${ICELAKE_FLAG}" + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx512vbmi ${CMAKE_MODULE_PATH}/keep.syms.in" + ) + endif (BUILD_AVX512VBMI) + + add_library(hs_exec_common OBJECT + ${hs_exec_common_SRCS} + src/dispatcher.c ) - endif (BUILD_AVX2) - if (BUILD_AVX512) - add_library(hs_exec_avx512 OBJECT ${hs_exec_SRCS} ${hs_exec_avx2_SRCS}) - list(APPEND RUNTIME_LIBS $) - set_target_properties(hs_exec_avx512 PROPERTIES - COMPILE_FLAGS "${SKYLAKE_FLAG}" - RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx512 ${CMAKE_MODULE_PATH}/keep.syms.in" + + # hs_version.c is added explicitly to avoid some build systems that refuse to + # create a lib without any src (I'm looking at you Xcode) + + add_library(hs_runtime STATIC src/hs_version.c + $ + ${RUNTIME_LIBS}) + set_target_properties(hs_runtime PROPERTIES LINKER_LANGUAGE C) + add_library(hs_compile OBJECT ${hs_compile_SRCS}) + + # we want the static lib for testing + add_library(hs STATIC src/hs_version.c src/hs_valid_platform.c + $ + $ + ${RUNTIME_LIBS}) + endif (BUILD_STATIC_LIBS) + + if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) + # build shared libs + add_library(hs_compile_shared OBJECT ${hs_compile_SRCS}) + set_target_properties(hs_compile_shared PROPERTIES POSITION_INDEPENDENT_CODE TRUE) + add_library(hs_exec_shared_core2 OBJECT ${hs_exec_SRCS}) + list(APPEND RUNTIME_SHLIBS $) + set_target_properties(hs_exec_shared_core2 PROPERTIES + COMPILE_FLAGS "-march=core2 -msse4.2" + POSITION_INDEPENDENT_CODE TRUE + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} core2 ${CMAKE_MODULE_PATH}/keep.syms.in" ) - endif (BUILD_AVX512) - if (BUILD_AVX512VBMI) - add_library(hs_exec_avx512vbmi OBJECT ${hs_exec_SRCS} ${hs_exec_avx2_SRCS}) - list(APPEND RUNTIME_LIBS $) - set_target_properties(hs_exec_avx512vbmi PROPERTIES - COMPILE_FLAGS "${ICELAKE_FLAG}" - RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx512vbmi ${CMAKE_MODULE_PATH}/keep.syms.in" + add_library(hs_exec_shared_corei7 OBJECT ${hs_exec_SRCS}) + list(APPEND RUNTIME_SHLIBS $) + set_target_properties(hs_exec_shared_corei7 PROPERTIES + COMPILE_FLAGS "-march=corei7 -msse4.2" + POSITION_INDEPENDENT_CODE TRUE + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} corei7 ${CMAKE_MODULE_PATH}/keep.syms.in" ) - endif (BUILD_AVX512VBMI) - add_library(hs_exec_common OBJECT + if (BUILD_AVX2) + add_library(hs_exec_shared_avx2 OBJECT ${hs_exec_SRCS} ${hs_exec_avx2_SRCS}) + list(APPEND RUNTIME_SHLIBS $) + set_target_properties(hs_exec_shared_avx2 PROPERTIES + COMPILE_FLAGS "-march=core-avx2 -mavx2" + POSITION_INDEPENDENT_CODE TRUE + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx2 ${CMAKE_MODULE_PATH}/keep.syms.in" + ) + endif (BUILD_AVX2) + if (BUILD_AVX512) + add_library(hs_exec_shared_avx512 OBJECT ${hs_exec_SRCS} ${hs_exec_avx2_SRCS}) + list(APPEND RUNTIME_SHLIBS $) + set_target_properties(hs_exec_shared_avx512 PROPERTIES + COMPILE_FLAGS "${SKYLAKE_FLAG}" + POSITION_INDEPENDENT_CODE TRUE + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx512 ${CMAKE_MODULE_PATH}/keep.syms.in" + ) + endif (BUILD_AVX512) + if (BUILD_AVX512VBMI) + add_library(hs_exec_shared_avx512vbmi OBJECT ${hs_exec_SRCS} ${hs_exec_avx2_SRCS}) + list(APPEND RUNTIME_SHLIBS $) + set_target_properties(hs_exec_shared_avx512vbmi PROPERTIES + COMPILE_FLAGS "${ICELAKE_FLAG}" + POSITION_INDEPENDENT_CODE TRUE + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx512vbmi ${CMAKE_MODULE_PATH}/keep.syms.in" + ) + endif (BUILD_AVX512VBMI) + add_library(hs_exec_common_shared OBJECT ${hs_exec_common_SRCS} src/dispatcher.c ) - - # hs_version.c is added explicitly to avoid some build systems that refuse to - # create a lib without any src (I'm looking at you Xcode) - - add_library(hs_runtime STATIC src/hs_version.c - $ - ${RUNTIME_LIBS}) - set_target_properties(hs_runtime PROPERTIES LINKER_LANGUAGE C) - add_library(hs_compile OBJECT ${hs_compile_SRCS}) - - # we want the static lib for testing - add_library(hs STATIC src/hs_version.c src/hs_valid_platform.c - $ - $ - ${RUNTIME_LIBS}) - - endif (BUILD_STATIC_LIBS) - - if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) - # build shared libs - add_library(hs_compile_shared OBJECT ${hs_compile_SRCS}) - set_target_properties(hs_compile_shared PROPERTIES POSITION_INDEPENDENT_CODE TRUE) - add_library(hs_exec_shared_core2 OBJECT ${hs_exec_SRCS}) - list(APPEND RUNTIME_SHLIBS $) - set_target_properties(hs_exec_shared_core2 PROPERTIES - COMPILE_FLAGS "-march=core2 -msse4.2" - POSITION_INDEPENDENT_CODE TRUE - RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} core2 ${CMAKE_MODULE_PATH}/keep.syms.in" - ) - add_library(hs_exec_shared_corei7 OBJECT ${hs_exec_SRCS}) - list(APPEND RUNTIME_SHLIBS $) - set_target_properties(hs_exec_shared_corei7 PROPERTIES - COMPILE_FLAGS "-march=corei7 -msse4.2" - POSITION_INDEPENDENT_CODE TRUE - RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} corei7 ${CMAKE_MODULE_PATH}/keep.syms.in" - ) - - if (BUILD_AVX2) - add_library(hs_exec_shared_avx2 OBJECT ${hs_exec_SRCS} ${hs_exec_avx2_SRCS}) - list(APPEND RUNTIME_SHLIBS $) - set_target_properties(hs_exec_shared_avx2 PROPERTIES - COMPILE_FLAGS "-march=core-avx2 -mavx2" - POSITION_INDEPENDENT_CODE TRUE - RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx2 ${CMAKE_MODULE_PATH}/keep.syms.in" + set_target_properties(hs_exec_common_shared PROPERTIES + OUTPUT_NAME hs_exec_common + POSITION_INDEPENDENT_CODE TRUE) + + endif() # SHARED + endif (ARCH_IA32 OR ARCH_X86_64) + if (ARCH_AARCH64) + set(BUILD_WRAPPER "${PROJECT_SOURCE_DIR}/cmake/build_wrapper.sh") + if (BUILD_STATIC_LIBS) + add_library(hs_exec_neon OBJECT ${hs_exec_SRCS}) + list(APPEND RUNTIME_LIBS $) + set_target_properties(hs_exec_neon PROPERTIES + COMPILE_FLAGS "-march=armv8-a" + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} neon ${CMAKE_MODULE_PATH}/keep.syms.in" ) - endif (BUILD_AVX2) - if (BUILD_AVX512) - add_library(hs_exec_shared_avx512 OBJECT ${hs_exec_SRCS} ${hs_exec_avx2_SRCS}) - list(APPEND RUNTIME_SHLIBS $) - set_target_properties(hs_exec_shared_avx512 PROPERTIES - COMPILE_FLAGS "${SKYLAKE_FLAG}" - POSITION_INDEPENDENT_CODE TRUE - RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx512 ${CMAKE_MODULE_PATH}/keep.syms.in" + + if (BUILD_SVE) + add_library(hs_exec_sve OBJECT ${hs_exec_SRCS} ${hs_exec_sve_SRCS}) + list(APPEND RUNTIME_LIBS $) + set_target_properties(hs_exec_sve PROPERTIES + COMPILE_FLAGS "-march=armv8-a+sve" + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} sve ${CMAKE_MODULE_PATH}/keep.syms.in" + ) + endif (BUILD_SVE) + if (BUILD_SVE2) + add_library(hs_exec_sve2 OBJECT ${hs_exec_SRCS} ${hs_exec_sve2_SRCS}) + list(APPEND RUNTIME_LIBS $) + set_target_properties(hs_exec_sve2 PROPERTIES + COMPILE_FLAGS "-march=armv8-a+sve2" + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} sve2 ${CMAKE_MODULE_PATH}/keep.syms.in" + ) + endif (BUILD_SVE2) + + add_library(hs_exec_common OBJECT + ${hs_exec_common_SRCS} + src/dispatcher.c ) - endif (BUILD_AVX512) - if (BUILD_AVX512VBMI) - add_library(hs_exec_shared_avx512vbmi OBJECT ${hs_exec_SRCS} ${hs_exec_avx2_SRCS}) - list(APPEND RUNTIME_SHLIBS $) - set_target_properties(hs_exec_shared_avx512vbmi PROPERTIES - COMPILE_FLAGS "${ICELAKE_FLAG}" + + # hs_version.c is added explicitly to avoid some build systems that refuse to + # create a lib without any src (I'm looking at you Xcode) + + add_library(hs_runtime STATIC src/hs_version.c + $ + ${RUNTIME_LIBS}) + set_target_properties(hs_runtime PROPERTIES LINKER_LANGUAGE C) + add_library(hs_compile OBJECT ${hs_compile_SRCS}) + + # we want the static lib for testing + add_library(hs STATIC src/hs_version.c src/hs_valid_platform.c + $ + $ + ${RUNTIME_LIBS}) + endif (BUILD_STATIC_LIBS) + + if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) + # build shared libs + add_library(hs_compile_shared OBJECT ${hs_compile_SRCS}) + set_target_properties(hs_compile_shared PROPERTIES POSITION_INDEPENDENT_CODE TRUE) + add_library(hs_exec_shared_neon OBJECT ${hs_exec_SRCS}) + list(APPEND RUNTIME_SHLIBS $) + set_target_properties(hs_exec_shared_neon PROPERTIES + COMPILE_FLAGS "-march=armv8-a" POSITION_INDEPENDENT_CODE TRUE - RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx512vbmi ${CMAKE_MODULE_PATH}/keep.syms.in" + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} neon ${CMAKE_MODULE_PATH}/keep.syms.in" ) - endif (BUILD_AVX512VBMI) - add_library(hs_exec_common_shared OBJECT - ${hs_exec_common_SRCS} - src/dispatcher.c - ) - set_target_properties(hs_exec_common_shared PROPERTIES - OUTPUT_NAME hs_exec_common - POSITION_INDEPENDENT_CODE TRUE) - endif() # SHARED - + if (BUILD_SVE) + add_library(hs_exec_shared_sve OBJECT ${hs_exec_SRCS} ${hs_exec_sve_SRCS}) + list(APPEND RUNTIME_SHLIBS $) + set_target_properties(hs_exec_shared_sve PROPERTIES + COMPILE_FLAGS "-march=armv8-a+sve" + POSITION_INDEPENDENT_CODE TRUE + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} sve ${CMAKE_MODULE_PATH}/keep.syms.in" + ) + endif (BUILD_SVE) + if (BUILD_SVE2) + add_library(hs_exec_shared_sve2 OBJECT ${hs_exec_SRCS} ${hs_exec_sve2_SRCS}) + list(APPEND RUNTIME_SHLIBS $) + set_target_properties(hs_exec_shared_sve2 PROPERTIES + COMPILE_FLAGS "-march=armv8-a+sve2" + POSITION_INDEPENDENT_CODE TRUE + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} sve2 ${CMAKE_MODULE_PATH}/keep.syms.in" + ) + endif (BUILD_SVE2) + add_library(hs_exec_common_shared OBJECT + ${hs_exec_common_SRCS} + src/dispatcher.c + ) + set_target_properties(hs_exec_common_shared PROPERTIES + OUTPUT_NAME hs_exec_common + POSITION_INDEPENDENT_CODE TRUE) + endif() # SHARED + endif (ARCH_AARCH64) endif (NOT FAT_RUNTIME) if (NOT BUILD_SHARED_LIBS) diff --git a/cmake/arch.cmake b/cmake/arch.cmake index 29c39b498..6dd183b99 100644 --- a/cmake/arch.cmake +++ b/cmake/arch.cmake @@ -8,7 +8,6 @@ elseif (HAVE_C_INTRIN_H) set (INTRIN_INC_H "intrin.h") elseif (HAVE_C_ARM_NEON_H) set (INTRIN_INC_H "arm_neon.h") - set (FAT_RUNTIME OFF) elseif (HAVE_C_PPC64EL_ALTIVEC_H) set (INTRIN_INC_H "altivec.h") set (FAT_RUNTIME OFF) @@ -77,21 +76,30 @@ if (BUILD_AVX512VBMI) endif () if (FAT_RUNTIME) - if (NOT DEFINED(BUILD_AVX2)) - set(BUILD_AVX2 TRUE) - endif () - # test the highest level microarch to make sure everything works - if (BUILD_AVX512) - if (BUILD_AVX512VBMI) - set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} ${ICELAKE_FLAG}") - else () - set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} ${SKYLAKE_FLAG}") - endif (BUILD_AVX512VBMI) - elseif (BUILD_AVX2) - set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} -march=core-avx2 -mavx2") - elseif () - set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} -march=core-i7 -mssse3") - endif () + if (ARCH_IA32 OR ARCH_X86_64) + if (NOT DEFINED(BUILD_AVX2)) + set(BUILD_AVX2 TRUE) + endif () + # test the highest level microarch to make sure everything works + if (BUILD_AVX512) + if (BUILD_AVX512VBMI) + set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} ${ICELAKE_FLAG}") + else () + set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} ${SKYLAKE_FLAG}") + endif (BUILD_AVX512VBMI) + elseif (BUILD_AVX2) + set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} -march=core-avx2 -mavx2") + elseif () + set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} -march=core-i7 -mssse3") + endif () + elseif(ARCH_AARCH64) + if (NOT DEFINED(BUILD_SVE)) + set(BUILD_SVE TRUE) + endif () + if (NOT DEFINED(BUILD_SVE2)) + set(BUILD_SVE2 TRUE) + endif () + endif() else (NOT FAT_RUNTIME) # if not fat runtime, then test given cflags set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} ${ARCH_C_FLAGS}") diff --git a/src/dispatcher.c b/src/dispatcher.c index f5f2d2c6e..775002f6b 100644 --- a/src/dispatcher.c +++ b/src/dispatcher.c @@ -32,7 +32,6 @@ #include "ue2common.h" #if defined(ARCH_IA32) || defined(ARCH_X86_64) #include "util/arch/x86/cpuid_inline.h" -#endif #include "util/join.h" #if defined(DISABLE_AVX512_DISPATCH) @@ -83,6 +82,41 @@ HS_PUBLIC_API \ RTYPE NAME(__VA_ARGS__) __attribute__((ifunc("resolve_" #NAME))) +#elif defined(ARCH_AARCH64) +#include "util/arch/arm/cpuid_inline.h" +#include "util/join.h" + +#define CREATE_DISPATCH(RTYPE, NAME, ...) \ + /* create defns */ \ + RTYPE JOIN(sve2_, NAME)(__VA_ARGS__); \ + RTYPE JOIN(sve_, NAME)(__VA_ARGS__); \ + RTYPE JOIN(neon_, NAME)(__VA_ARGS__); \ + \ + /* error func */ \ + static inline RTYPE JOIN(error_, NAME)(__VA_ARGS__) { \ + return (RTYPE)HS_ARCH_ERROR; \ + } \ + \ + /* resolver */ \ + static RTYPE (*JOIN(resolve_, NAME)(void))(__VA_ARGS__) { \ + if (check_sve2()) { \ + return JOIN(sve2_, NAME); \ + } \ + if (check_sve()) { \ + return JOIN(sve_, NAME); \ + } \ + if (check_neon()) { \ + return JOIN(neon_, NAME); \ + } \ + /* anything else is fail */ \ + return JOIN(error_, NAME); \ + } \ + \ + /* function */ \ + HS_PUBLIC_API \ + RTYPE NAME(__VA_ARGS__) __attribute__((ifunc("resolve_" #NAME))) + +#endif CREATE_DISPATCH(hs_error_t, hs_scan, const hs_database_t *db, const char *data, unsigned length, unsigned flags, hs_scratch_t *scratch, match_event_handler onEvent, void *userCtx); diff --git a/src/hs.cpp b/src/hs.cpp index 73cc032f6..61e46148c 100644 --- a/src/hs.cpp +++ b/src/hs.cpp @@ -199,11 +199,13 @@ hs_compile_multi_int(const char *const *expressions, const unsigned *flags, } #if defined(FAT_RUNTIME) +#if defined(ARCH_IA32) || defined(ARCH_X86_64) if (!check_ssse3()) { *db = nullptr; *comp_error = generateCompileError("Unsupported architecture", -1); return HS_ARCH_ERROR; } +#endif #endif if (!checkMode(mode, comp_error)) { @@ -320,13 +322,14 @@ hs_compile_lit_multi_int(const char *const *expressions, const unsigned *flags, *comp_error = generateCompileError("Invalid parameter: elements is zero", -1); return HS_COMPILER_ERROR; } - #if defined(FAT_RUNTIME) +#if defined(ARCH_IA32) || defined(ARCH_X86_64) if (!check_ssse3()) { *db = nullptr; *comp_error = generateCompileError("Unsupported architecture", -1); return HS_ARCH_ERROR; } +#endif #endif if (!checkMode(mode, comp_error)) { @@ -500,10 +503,12 @@ hs_error_t hs_expression_info_int(const char *expression, unsigned int flags, } #if defined(FAT_RUNTIME) +#if defined(ARCH_IA32) || defined(ARCH_X86_64) if (!check_ssse3()) { *error = generateCompileError("Unsupported architecture", -1); return HS_ARCH_ERROR; } +#endif #endif if (!info) { @@ -631,9 +636,11 @@ hs_error_t HS_CDECL hs_populate_platform(hs_platform_info_t *platform) { extern "C" HS_PUBLIC_API hs_error_t HS_CDECL hs_free_compile_error(hs_compile_error_t *error) { #if defined(FAT_RUNTIME) +#if defined(ARCH_IA32) || defined(ARCH_X86_64) if (!check_ssse3()) { return HS_ARCH_ERROR; } +#endif #endif freeCompileError(error); return HS_SUCCESS; diff --git a/src/hs_valid_platform.c b/src/hs_valid_platform.c index 809deee1d..0af36b6c4 100644 --- a/src/hs_valid_platform.c +++ b/src/hs_valid_platform.c @@ -31,6 +31,8 @@ #include "ue2common.h" #if defined(ARCH_IA32) || defined(ARCH_X86_64) #include "util/arch/x86/cpuid_inline.h" +#elif defined(ARCH_AARCH64) +#include "util/arch/arm/cpuid_inline.h" #endif HS_PUBLIC_API @@ -43,7 +45,11 @@ hs_error_t HS_CDECL hs_valid_platform(void) { return HS_ARCH_ERROR; } #elif defined(ARCH_ARM32) || defined(ARCH_AARCH64) - return HS_SUCCESS; + if (check_neon()) { + return HS_SUCCESS; + } else { + return HS_ARCH_ERROR; + } #elif defined(ARCH_PPC64EL) return HS_SUCCESS; #endif diff --git a/src/util/arch/arm/cpuid_inline.h b/src/util/arch/arm/cpuid_inline.h new file mode 100644 index 000000000..1173b42cc --- /dev/null +++ b/src/util/arch/arm/cpuid_inline.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2017-2020, Intel Corporation + * Copyright (c) 2023, VectorCamp PC + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef AARCH64_CPUID_INLINE_H_ +#define AARCH64_CPUID_INLINE_H_ + +#include + +#include "ue2common.h" +#include "util/arch/common/cpuid_flags.h" + +static inline +int check_neon(void) { + return 1; +} + +static inline +int check_sve(void) { + unsigned long hwcap = getauxval(AT_HWCAP); + if (hwcap & HWCAP_SVE) { + return 1; + } + return 0; +} + +static inline +int check_sve2(void) { + unsigned long hwcap2 = getauxval(AT_HWCAP2); + if (hwcap2 & HWCAP2_SVE2) { + return 1; + } + return 0; +} + +#endif // AARCH64_CPUID_INLINE_H_ From 0ec7b4e77b7a2273099e3dffcf6eaa71de25dbbc Mon Sep 17 00:00:00 2001 From: Konstantinos Margaritis Date: Wed, 23 Aug 2023 10:21:02 +0000 Subject: [PATCH 2/2] fix SVE flags detection order #145 --- CMakeLists.txt | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index bc4c98466..43ce320b1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -187,11 +187,15 @@ if (CMAKE_COMPILER_IS_GNUCC AND NOT CROSS_COMPILE) string(FIND "${GNUCC_ARCH}" "sve" POS_SVE) string(FIND "${GNUCC_ARCH}" "sve2" POS_SVE2) string(FIND "${GNUCC_ARCH}" "sve2-bitperm" POS_SVE2_BITPERM) - if (NOT POS_SVE EQUAL 0) + if(NOT POS_SVE2_BITPERM EQUAL 0) + set(SVE2_BITPERM_FOUND 1) + set(SVE2_FOUND 1) set(SVE_FOUND 1) elseif(NOT POS_SVE2 EQUAL 0) set(SVE2_FOUND 1) - elseif(NOT POS_SVE2_BITPERM EQUAL 0) + set(SVE_FOUND 1) + elseif (NOT POS_SVE EQUAL 0) + set(SVE_FOUND 1) set(SVE2_BITPERM_FOUND 1) endif()