Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Prepare for new release 5.4.9 #131

Merged
merged 15 commits into from
Sep 19, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 48 additions & 14 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
cmake_minimum_required (VERSION 2.8.11)
cmake_minimum_required (VERSION 2.8.12)

project (vectorscan C CXX)

set (HS_MAJOR_VERSION 5)
Expand Down Expand Up @@ -165,27 +166,49 @@ if (CMAKE_COMPILER_IS_GNUCC AND NOT CROSS_COMPILE)
# cpuid info and then chooses the best microarch it can (and replaces
# the flag), so use that for tune.

set(TUNE_FLAG "mtune")
set(GNUCC_TUNE "")
message(STATUS "ARCH_FLAG '${ARCH_FLAG}' '${GNUCC_ARCH}', TUNE_FLAG '${TUNE_FLAG}' '${GNUCC_TUNE}' ")

# arg1 might exist if using ccache
string (STRIP "${CMAKE_C_COMPILER_ARG1}" CC_ARG1)
set (EXEC_ARGS ${CC_ARG1} -c -Q --help=target -${ARCH_FLAG}=native -mtune=native)
set (EXEC_ARGS ${CC_ARG1} -c -Q --help=target -${ARCH_FLAG}=native -${TUNE_FLAG}=native)
execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS}
OUTPUT_VARIABLE _GCC_OUTPUT)
string(FIND "${_GCC_OUTPUT}" "${ARCH_FLAG}" POS)
set(_GCC_OUTPUT_TUNE ${_GCC_OUTPUT})
string(FIND "${_GCC_OUTPUT}" "${ARCH_FLAG}=" POS)
string(SUBSTRING "${_GCC_OUTPUT}" ${POS} -1 _GCC_OUTPUT)
string(REGEX REPLACE "${ARCH_FLAG}=[ \t]*([^ \n]*)[ \n].*" "\\1" GNUCC_ARCH "${_GCC_OUTPUT}")

string(FIND "${_GCC_OUTPUT_TUNE}" "${TUNE_FLAG}=" POS_TUNE)
string(SUBSTRING "${_GCC_OUTPUT_TUNE}" ${POS_TUNE} -1 _GCC_OUTPUT_TUNE)
string(REGEX REPLACE "${TUNE_FLAG}=[ \t]*([^ \n]*)[ \n].*" "\\1" GNUCC_TUNE "${_GCC_OUTPUT_TUNE}")

string(FIND "${GNUCC_ARCH}" "sve" POS_SVE)
string(FIND "${GNUCC_ARCH}" "sve2" POS_SVE2)
string(FIND "${GNUCC_ARCH}" "sve2-bitperm" POS_SVE2_BITPERM)
if (NOT POS_SVE EQUAL 0)
set(SVE_FOUND 1)
elseif(NOT POS_SVE2 EQUAL 0)
set(SVE2_FOUND 1)
elseif(NOT POS_SVE2_BITPERM EQUAL 0)
set(SVE2_BITPERM_FOUND 1)
endif()

message(STATUS "ARCH_FLAG '${ARCH_FLAG}' '${GNUCC_ARCH}', TUNE_FLAG '${TUNE_FLAG}' '${GNUCC_TUNE}' ")

# test the parsed flag
set (EXEC_ARGS ${CC_ARG1} -E - -mtune=${GNUCC_ARCH})
set (EXEC_ARGS ${CC_ARG1} -E - -${ARCH_FLAG}=${GNUCC_ARCH} -${TUNE_FLAG}=${GNUCC_TUNE})
execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS}
OUTPUT_QUIET ERROR_QUIET
INPUT_FILE /dev/null
RESULT_VARIABLE GNUCC_TUNE_TEST)
if (NOT GNUCC_TUNE_TEST EQUAL 0)
message(WARNING "Something went wrong determining gcc tune: -mtune=${GNUCC_ARCH} not valid, falling back to -mtune=native")
set(TUNE_FLAG native)
message(WARNING "Something went wrong determining gcc tune: -mtune=${GNUCC_TUNE} not valid, falling back to -mtune=native")
set(GNUCC_TUNE native)
else()
set(TUNE_FLAG ${GNUCC_ARCH})
message(STATUS "gcc will tune for ${GNUCC_ARCH}, ${TUNE_FLAG}")
set(GNUCC_TUNE ${GNUCC_TUNE})
message(STATUS "gcc will tune for ${GNUCC_ARCH}, ${GNUCC_TUNE}")
endif()
elseif (CMAKE_COMPILER_IS_CLANG AND NOT CROSS_COMPILE)
if (ARCH_IA32 OR ARCH_X86_64)
Expand Down Expand Up @@ -226,22 +249,26 @@ if (ARCH_IA32 OR ARCH_X86_64)
endif()

if (ARCH_AARCH64)
if (BUILD_SVE2_BITPERM)
if (BUILD_SVE2_BITPERM AND NOT SVE2_BITPERM_FOUND)
set(GNUCC_ARCH "${GNUCC_ARCH}+sve2-bitperm")
elseif (BUILD_SVE2)
elseif (BUILD_SVE2 AND NOT SVE2_FOUND)
set(GNUCC_ARCH "${GNUCC_ARCH}+sve2")
elseif (BUILD_SVE)
elseif (BUILD_SVE AND NOT SVE_FOUND)
set(GNUCC_ARCH "${GNUCC_ARCH}+sve")
endif ()
endif(ARCH_AARCH64)


message(STATUS "ARCH_C_FLAGS : ${ARCH_C_FLAGS}")
message(STATUS "ARCH_CXX_FLAGS : ${ARCH_CXX_FLAGS}")

if (NOT FAT_RUNTIME)
set(ARCH_C_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -mtune=${TUNE_FLAG} ${ARCH_C_FLAGS}")
set(ARCH_CXX_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -mtune=${TUNE_FLAG} ${ARCH_CXX_FLAGS}")
if (GNUCC_TUNE)
set(ARCH_C_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -${TUNE_FLAG}=${GNUCC_TUNE}")
set(ARCH_CXX_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -${TUNE_FLAG}=${GNUCC_TUNE}")
else()
set(ARCH_C_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -mtune=${TUNE_FLAG} ${ARCH_C_FLAGS}")
set(ARCH_CXX_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -mtune=${TUNE_FLAG} ${ARCH_CXX_FLAGS}")
endif()
endif()

#if (ARCH_IA32 OR ARCH_X86_64 OR ARCH_ARM32 OR ARCH_AARCH64)
Expand Down Expand Up @@ -296,6 +323,12 @@ if (NOT RELEASE_BUILD)
# release builds
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Werror")
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Werror")
if (CMAKE_COMPILER_IS_CLANG)
if (CMAKE_C_COMPILER_VERSION VERSION_GREATER "13.0")
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Wno-unused-but-set-variable")
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-unused-but-set-variable")
endif()
endif()
endif()

if (DISABLE_ASSERTS)
Expand Down Expand Up @@ -332,6 +365,7 @@ if (ARCH_IA32 OR ARCH_X86_64)
elseif (ARCH_ARM32 OR ARCH_AARCH64)
CHECK_INCLUDE_FILE_CXX(arm_neon.h HAVE_C_ARM_NEON_H)
if (BUILD_SVE OR BUILD_SVE2 OR BUILD_SVE2_BITPERM)
set(CMAKE_REQUIRED_FLAGS ${ARCH_CXX_FLAGS})
CHECK_INCLUDE_FILE_CXX(arm_sve.h HAVE_C_ARM_SVE_H)
if (NOT HAVE_C_ARM_SVE_H)
message(FATAL_ERROR "arm_sve.h is required to build for SVE.")
Expand Down
2 changes: 1 addition & 1 deletion src/nfa/mcclellancompile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1484,12 +1484,12 @@ bytecode_ptr<NFA> mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat,
find_wide_state(info);
}

u16 total_daddy = 0;
bool any_cyclic_near_anchored_state
= is_cyclic_near(raw, raw.start_anchored);

// Sherman optimization
if (info.impl_alpha_size > 16) {
u16 total_daddy = 0;
for (u32 i = 0; i < info.size(); i++) {
if (info.is_widestate(i)) {
continue;
Expand Down
3 changes: 1 addition & 2 deletions src/nfagraph/ng_misc_opt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -385,8 +385,7 @@ bool improveGraph(NGHolder &g, som_type som) {

const vector<NFAVertex> ordering = getTopoOrdering(g);

return enlargeCyclicCR(g, som, ordering)
| enlargeCyclicCR_rev(g, ordering);
return enlargeCyclicCR(g, som, ordering) || enlargeCyclicCR_rev(g, ordering);
}

/** finds a smaller reachability for a state by the reverse transformation of
Expand Down
4 changes: 2 additions & 2 deletions src/rose/rose_build_add.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -216,9 +216,9 @@ RoseRoleHistory selectHistory(const RoseBuildImpl &tbi, const RoseBuildData &bd,
const bool fixed_offset_src = g[u].fixedOffset();
const bool has_bounds = g[e].minBound || (g[e].maxBound != ROSE_BOUND_INF);

DEBUG_PRINTF("edge %zu->%zu, bounds=[%u,%u], fixed_u=%d, prefix=%d\n",
/*DEBUG_PRINTF("edge %zu->%zu, bounds=[%u,%u], fixed_u=%d, prefix=%d\n",
g[u].index, g[v].index, g[e].minBound, g[e].maxBound,
(int)g[u].fixedOffset(), (int)g[v].left);
(int)g[u].fixedOffset(), (int)g[v].left);*/

if (g[v].left) {
// Roles with prefix engines have their history handled by that prefix.
Expand Down
34 changes: 17 additions & 17 deletions src/util/arch/ppc64el/simd_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ static really_inline u32 movemask128(m128 a) {
static uint8x16_t perm = { 16, 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
uint8x16_t bitmask = vec_gb((uint8x16_t) a);
bitmask = (uint8x16_t) vec_perm(vec_splat_u8(0), bitmask, perm);
u32 movemask;
u32 ALIGN_ATTR(16) movemask;
vec_ste((uint32x4_t) bitmask, 0, &movemask);
return movemask;
}
Expand Down Expand Up @@ -285,27 +285,27 @@ m128 loadbytes128(const void *ptr, unsigned int n) {
return a;
}

#define CASE_ALIGN_VECTORS(a, b, offset) case offset: return (m128)vec_sld((int8x16_t)(b), (int8x16_t)(a), (16 - offset)); break;
#define CASE_ALIGN_VECTORS(a, b, offset) case offset: return (m128)vec_sld((int8x16_t)(a), (int8x16_t)(b), (16 - offset)); break;

static really_really_inline
m128 palignr_imm(m128 r, m128 l, int offset) {
switch (offset) {
case 0: return l; break;
CASE_ALIGN_VECTORS(l, r, 1);
CASE_ALIGN_VECTORS(l, r, 2);
CASE_ALIGN_VECTORS(l, r, 3);
CASE_ALIGN_VECTORS(l, r, 4);
CASE_ALIGN_VECTORS(l, r, 5);
CASE_ALIGN_VECTORS(l, r, 6);
CASE_ALIGN_VECTORS(l, r, 7);
CASE_ALIGN_VECTORS(l, r, 8);
CASE_ALIGN_VECTORS(l, r, 9);
CASE_ALIGN_VECTORS(l, r, 10);
CASE_ALIGN_VECTORS(l, r, 11);
CASE_ALIGN_VECTORS(l, r, 12);
CASE_ALIGN_VECTORS(l, r, 13);
CASE_ALIGN_VECTORS(l, r, 14);
CASE_ALIGN_VECTORS(l, r, 15);
CASE_ALIGN_VECTORS(r, l, 1);
CASE_ALIGN_VECTORS(r, l, 2);
CASE_ALIGN_VECTORS(r, l, 3);
CASE_ALIGN_VECTORS(r, l, 4);
CASE_ALIGN_VECTORS(r, l, 5);
CASE_ALIGN_VECTORS(r, l, 6);
CASE_ALIGN_VECTORS(r, l, 7);
CASE_ALIGN_VECTORS(r, l, 8);
CASE_ALIGN_VECTORS(r, l, 9);
CASE_ALIGN_VECTORS(r, l, 10);
CASE_ALIGN_VECTORS(r, l, 11);
CASE_ALIGN_VECTORS(r, l, 12);
CASE_ALIGN_VECTORS(r, l, 13);
CASE_ALIGN_VECTORS(r, l, 14);
CASE_ALIGN_VECTORS(r, l, 15);
case 16: return r; break;
default: return zeroes128(); break;
}
Expand Down
6 changes: 3 additions & 3 deletions src/util/supervector/arch/ppc64el/impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ really_inline SuperVector<16>::SuperVector(SuperVector const &other)

template<>
template<>
really_inline SuperVector<16>::SuperVector(char __bool __vector v)
really_inline SuperVector<16>::SuperVector(__vector __bool char v)
{
u.u8x16[0] = (uint8x16_t) v;
};
Expand Down Expand Up @@ -269,10 +269,10 @@ really_inline SuperVector<16> SuperVector<16>::eq(SuperVector<16> const &b) cons
template <>
really_inline typename SuperVector<16>::comparemask_type
SuperVector<16>::comparemask(void) const {
uint8x16_t bitmask = vec_gb( u.u8x16[0]);
static uint8x16_t perm = { 16, 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
uint8x16_t bitmask = vec_gb(u.u8x16[0]);
bitmask = (uint8x16_t) vec_perm(vec_splat_u8(0), bitmask, perm);
u32 movemask;
u32 ALIGN_ATTR(16) movemask;
vec_ste((uint32x4_t) bitmask, 0, &movemask);
return movemask;
}
Expand Down
2 changes: 0 additions & 2 deletions src/util/supervector/arch/x86/impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -523,9 +523,7 @@ template <>
really_inline SuperVector<16> SuperVector<16>::loadu_maskz(void const *ptr, uint8_t const len)
{
SuperVector mask = Ones_vshr(16 -len);
mask.print8("mask");
SuperVector v = _mm_loadu_si128((const m128 *)ptr);
v.print8("v");
return mask & v;
}

Expand Down