From 2eb6bb28fb638a459cfa2f77fbb30804989449de Mon Sep 17 00:00:00 2001 From: Neil Henning Date: Tue, 2 Feb 2021 09:12:13 +0000 Subject: [PATCH] Fix the hashmap_remove flaw. Fixes #16. --- .github/workflows/cmake.yml | 11 +++- hashmap.h | 29 ++++++--- test/CMakeLists.txt | 50 +++++++++++---- test/test.c | 28 +++++++++ test/test.cpp | 28 +++++++++ test/test.inc | 0 test/test11.cpp | 36 ++++++++++- test/test_sse42.c | 28 +++++++++ test/utest.h | 118 +++++++++++++++++++----------------- 9 files changed, 246 insertions(+), 82 deletions(-) delete mode 100644 test/test.inc diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index 452b3c4..3779e2d 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -14,7 +14,6 @@ jobs: type: [Debug, RelWithDebInfo, MinSizeRel, Release] compiler: [default, clang, gcc] exclude: - - {os: "windows-latest", compiler: "clang"} - {os: "macOS-latest", compiler: "clang"} - {os: "windows-latest", compiler: "gcc"} - {os: "macOS-latest", compiler: "gcc"} @@ -44,12 +43,18 @@ jobs: working-directory: ${{github.workspace}}/build run: cmake $GITHUB_WORKSPACE/test -DCMAKE_BUILD_TYPE=${{ matrix.type }} -DCMAKE_C_COMPILER=gcc-10 -DCMAKE_CXX_COMPILER=g++-10 - - name: Configure CMake with Clang + - name: Configure CMake with Clang (Ubuntu) shell: bash - if: matrix.compiler == 'clang' + if: (matrix.compiler == 'clang') && startsWith(matrix.os, 'ubuntu') working-directory: ${{github.workspace}}/build run: cmake $GITHUB_WORKSPACE/test -DCMAKE_BUILD_TYPE=${{ matrix.type }} -DCMAKE_C_COMPILER=clang-10 -DCMAKE_CXX_COMPILER=clang++-10 + - name: Configure CMake with Clang (Windows) + shell: bash + if: (matrix.compiler == 'clang') && startsWith(matrix.os, 'windows') + working-directory: ${{github.workspace}}/build + run: cmake $GITHUB_WORKSPACE/test -DCMAKE_BUILD_TYPE=${{ matrix.type }} -T ClangCL + - name: Build working-directory: ${{github.workspace}}/build shell: bash diff --git a/hashmap.h b/hashmap.h index 4806f3d..52b3a5c 100644 --- a/hashmap.h +++ b/hashmap.h @@ -286,6 +286,7 @@ int hashmap_remove(struct hashmap_s *const m, const char *const key, return 0; } } + curr = (curr + 1) % m->table_size; } @@ -449,6 +450,7 @@ int hashmap_hash_helper(const struct hashmap_s *const m, const char *const key, const unsigned len, unsigned *const out_index) { unsigned int curr; unsigned int i; + int total_in_use; /* If full, return immediately */ if (m->size >= m->table_size) { @@ -458,15 +460,15 @@ int hashmap_hash_helper(const struct hashmap_s *const m, const char *const key, /* Find the best index */ curr = hashmap_hash_helper_int_helper(m, key, len); - /* Linear probing */ + /* First linear probe to check if we've already insert the element */ + total_in_use = 0; + for (i = 0; i < HASHMAP_MAX_CHAIN_LENGTH; i++) { - if (!m->data[curr].in_use) { - *out_index = curr; - return 1; - } + const int in_use = m->data[curr].in_use; + + total_in_use += in_use; - if (m->data[curr].in_use && - hashmap_match_helper(&m->data[curr], key, len)) { + if (in_use && hashmap_match_helper(&m->data[curr], key, len)) { *out_index = curr; return 1; } @@ -474,6 +476,19 @@ int hashmap_hash_helper(const struct hashmap_s *const m, const char *const key, curr = (curr + 1) % m->table_size; } + /* Second linear probe to actually insert our element (only if there was at + * least one empty entry) */ + if (HASHMAP_MAX_CHAIN_LENGTH > total_in_use) { + for (i = 0; i < HASHMAP_MAX_CHAIN_LENGTH; i++) { + if (!m->data[curr].in_use) { + *out_index = curr; + return 1; + } + + curr = (curr + 1) % m->table_size; + } + } + return 0; } diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 13fe4ce..bfa20d3 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -24,7 +24,7 @@ # For more information, please refer to project(hashmap) -cmake_minimum_required(VERSION 3.1.3) +cmake_minimum_required(VERSION 3.15) include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../) @@ -33,9 +33,15 @@ if("${CMAKE_C_COMPILER_ID}" STREQUAL "GNU") COMPILE_FLAGS "-Wall -Wextra -Werror -std=gnu89" ) elseif("${CMAKE_C_COMPILER_ID}" STREQUAL "Clang") - set_source_files_properties(main.c test.c PROPERTIES - COMPILE_FLAGS "-Wall -Wextra -Weverything -Werror -std=gnu89" - ) + if(CMAKE_CXX_COMPILER_FRONTEND_VARIANT STREQUAL "MSVC") + set_source_files_properties(main.c test.c PROPERTIES + COMPILE_FLAGS "/Wall /WX /wd4514 /wd5045" + ) + else() + set_source_files_properties(main.c test.c PROPERTIES + COMPILE_FLAGS "-Wall -Wextra -Weverything -Werror -std=gnu89" + ) + endif() elseif("${CMAKE_C_COMPILER_ID}" STREQUAL "MSVC") set_source_files_properties(main.c test.c PROPERTIES COMPILE_FLAGS "/Wall /WX /wd4514 /wd5045" @@ -49,9 +55,15 @@ if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") COMPILE_FLAGS "-Wall -Wextra -Werror -std=gnu++98" ) elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") - set_source_files_properties(test.cpp PROPERTIES - COMPILE_FLAGS "-Wall -Wextra -Weverything -Werror -std=gnu++98" - ) + if(CMAKE_CXX_COMPILER_FRONTEND_VARIANT STREQUAL "MSVC") + set_source_files_properties(test.cpp PROPERTIES + COMPILE_FLAGS "/Wall /WX /wd4514 /wd5045" + ) + else() + set_source_files_properties(test.cpp PROPERTIES + COMPILE_FLAGS "-Wall -Wextra -Weverything -Werror -std=gnu++98" + ) + endif() elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC") set_source_files_properties(test.cpp PROPERTIES COMPILE_FLAGS "/Wall /WX /wd4514 /wd5045" @@ -65,9 +77,15 @@ if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") COMPILE_FLAGS "-Wall -Wextra -Werror -std=c++11" ) elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") - set_source_files_properties(test11.cpp PROPERTIES - COMPILE_FLAGS "-Wall -Wextra -Weverything -Werror -std=c++11 -Wno-c++98-compat" - ) + if(CMAKE_CXX_COMPILER_FRONTEND_VARIANT STREQUAL "MSVC") + set_source_files_properties(test11.cpp PROPERTIES + COMPILE_FLAGS "/Wall /WX /wd4514 /wd5045" + ) + else() + set_source_files_properties(test11.cpp PROPERTIES + COMPILE_FLAGS "-Wall -Wextra -Weverything -Werror -std=c++11" + ) + endif() elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC") set_source_files_properties(test11.cpp PROPERTIES COMPILE_FLAGS "/Wall /WX /wd4514 /wd5045" @@ -82,9 +100,15 @@ if (NOT "${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "arm64") COMPILE_FLAGS "-Wall -Wextra -Werror -std=gnu89 -msse4.2" ) elseif("${CMAKE_C_COMPILER_ID}" STREQUAL "Clang") - set_source_files_properties(test_sse42.c PROPERTIES - COMPILE_FLAGS "-Wall -Wextra -Weverything -Werror -std=gnu89 -msse4.2" - ) + if(CMAKE_CXX_COMPILER_FRONTEND_VARIANT STREQUAL "MSVC") + set_source_files_properties(test_sse42.c PROPERTIES + COMPILE_FLAGS "/Wall /WX /wd4514 /wd5045 /arch:AVX" + ) + else() + set_source_files_properties(test_sse42.c PROPERTIES + COMPILE_FLAGS "-Wall -Wextra -Weverything -Werror -std=gnu89 -msse4.2" + ) + endif() elseif("${CMAKE_C_COMPILER_ID}" STREQUAL "MSVC") set_source_files_properties(test_sse42.c PROPERTIES COMPILE_FLAGS "/Wall /WX /wd4514 /wd5045 /arch:AVX" diff --git a/test/test.c b/test/test.c index 9aa7968..4aeea6a 100644 --- a/test/test.c +++ b/test/test.c @@ -191,3 +191,31 @@ unsigned crc32_scalar(const char *const s, const unsigned len); unsigned crc32_scalar(const char *const s, const unsigned len) { return hashmap_crc32_helper(s, len); } + +UTEST(c, hash_conflict) { + struct hashmap_s hashmap; + + int x = 42; + int y = 13; + int z = -53; + + ASSERT_EQ(0, hashmap_create(4, &hashmap)); + + // These all hash to the same value. + ASSERT_EQ(0, hashmap_put(&hashmap, "000", 3, &x)); + ASSERT_EQ(0, hashmap_put(&hashmap, "002", 3, &y)); + ASSERT_EQ(0, hashmap_put(&hashmap, "007", 3, &z)); + ASSERT_EQ(3u, hashmap_num_entries(&hashmap)); + + // Now we remove the middle value. + ASSERT_EQ(0, hashmap_remove(&hashmap, "002", 3)); + ASSERT_EQ(2u, hashmap_num_entries(&hashmap)); + + // And now attempt to insert the last value again. There was a bug where this + // would insert a new entry incorrectly instead of resolving to the previous + // entry. + ASSERT_EQ(0, hashmap_put(&hashmap, "007", 3, &z)); + ASSERT_EQ(2u, hashmap_num_entries(&hashmap)); + + hashmap_destroy(&hashmap); +} diff --git a/test/test.cpp b/test/test.cpp index 117adc4..3c40439 100644 --- a/test/test.cpp +++ b/test/test.cpp @@ -158,3 +158,31 @@ UTEST(cpp, num_entries) { ASSERT_EQ(0u, hashmap_num_entries(&hashmap)); hashmap_destroy(&hashmap); } + +UTEST(cpp, hash_conflict) { + struct hashmap_s hashmap; + + int x = 42; + int y = 13; + int z = -53; + + ASSERT_EQ(0, hashmap_create(4, &hashmap)); + + // These all hash to the same value. + ASSERT_EQ(0, hashmap_put(&hashmap, "000", 3, &x)); + ASSERT_EQ(0, hashmap_put(&hashmap, "002", 3, &y)); + ASSERT_EQ(0, hashmap_put(&hashmap, "007", 3, &z)); + ASSERT_EQ(3u, hashmap_num_entries(&hashmap)); + + // Now we remove the middle value. + ASSERT_EQ(0, hashmap_remove(&hashmap, "002", 3)); + ASSERT_EQ(2u, hashmap_num_entries(&hashmap)); + + // And now attempt to insert the last value again. There was a bug where this + // would insert a new entry incorrectly instead of resolving to the previous + // entry. + ASSERT_EQ(0, hashmap_put(&hashmap, "007", 3, &z)); + ASSERT_EQ(2u, hashmap_num_entries(&hashmap)); + + hashmap_destroy(&hashmap); +} diff --git a/test/test.inc b/test/test.inc deleted file mode 100644 index e69de29..0000000 diff --git a/test/test11.cpp b/test/test11.cpp index f11f6bd..91dc709 100644 --- a/test/test11.cpp +++ b/test/test11.cpp @@ -1,8 +1,7 @@ #include "hashmap.h" #include "utest.h" -// Old MSVC doesn't support noexcept -#if defined(_MSC_VER) && (_MSC_VER < 1900) +#if defined(_MSC_VER) && (_MSC_VER < 1920) #define NOTHROW __declspec(nothrow) #define NOEXCEPT #else @@ -10,6 +9,11 @@ #define NOEXCEPT noexcept #endif +#if defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wc++98-compat" +#endif + UTEST(cpp11, create) { struct hashmap_s hashmap; ASSERT_EQ(0, hashmap_create(1, &hashmap)); @@ -163,3 +167,31 @@ UTEST(cpp11, num_entries) { ASSERT_EQ(0u, hashmap_num_entries(&hashmap)); hashmap_destroy(&hashmap); } + +UTEST(cpp11, hash_conflict) { + struct hashmap_s hashmap; + + int x = 42; + int y = 13; + int z = -53; + + ASSERT_EQ(0, hashmap_create(4, &hashmap)); + + // These all hash to the same value. + ASSERT_EQ(0, hashmap_put(&hashmap, "000", 3, &x)); + ASSERT_EQ(0, hashmap_put(&hashmap, "002", 3, &y)); + ASSERT_EQ(0, hashmap_put(&hashmap, "007", 3, &z)); + ASSERT_EQ(3u, hashmap_num_entries(&hashmap)); + + // Now we remove the middle value. + ASSERT_EQ(0, hashmap_remove(&hashmap, "002", 3)); + ASSERT_EQ(2u, hashmap_num_entries(&hashmap)); + + // And now attempt to insert the last value again. There was a bug where this + // would insert a new entry incorrectly instead of resolving to the previous + // entry. + ASSERT_EQ(0, hashmap_put(&hashmap, "007", 3, &z)); + ASSERT_EQ(2u, hashmap_num_entries(&hashmap)); + + hashmap_destroy(&hashmap); +} diff --git a/test/test_sse42.c b/test/test_sse42.c index b7cef38..8a76a6b 100644 --- a/test/test_sse42.c +++ b/test/test_sse42.c @@ -160,4 +160,32 @@ UTEST(c_sse42, crc32_matches) { } } +UTEST(c_sse42, hash_conflict) { + struct hashmap_s hashmap; + + int x = 42; + int y = 13; + int z = -53; + + ASSERT_EQ(0, hashmap_create(4, &hashmap)); + + // These all hash to the same value. + ASSERT_EQ(0, hashmap_put(&hashmap, "000", 3, &x)); + ASSERT_EQ(0, hashmap_put(&hashmap, "002", 3, &y)); + ASSERT_EQ(0, hashmap_put(&hashmap, "007", 3, &z)); + ASSERT_EQ(3u, hashmap_num_entries(&hashmap)); + + // Now we remove the middle value. + ASSERT_EQ(0, hashmap_remove(&hashmap, "002", 3)); + ASSERT_EQ(2u, hashmap_num_entries(&hashmap)); + + // And now attempt to insert the last value again. There was a bug where this + // would insert a new entry incorrectly instead of resolving to the previous + // entry. + ASSERT_EQ(0, hashmap_put(&hashmap, "007", 3, &z)); + ASSERT_EQ(2u, hashmap_num_entries(&hashmap)); + + hashmap_destroy(&hashmap); +} + #endif diff --git a/test/utest.h b/test/utest.h index 26bf890..5ea323b 100644 --- a/test/utest.h +++ b/test/utest.h @@ -36,20 +36,17 @@ #ifdef _MSC_VER /* Disable warning about not inlining 'inline' functions. - TODO: We'll fix this later by not using fprintf within our macros, and - instead use snprintf to a realloc'ed buffer. */ #pragma warning(disable : 4710) /* Disable warning about inlining functions that are not marked 'inline'. - TODO: add a UTEST_NOINLINE onto the macro generated functions to fix this. */ #pragma warning(disable : 4711) #pragma warning(push, 1) #endif -#if defined(_MSC_VER) +#if defined(_MSC_VER) && (_MSC_VER < 1920) typedef __int64 utest_int64_t; typedef unsigned __int64 utest_uint64_t; #else @@ -68,20 +65,29 @@ typedef uint64_t utest_uint64_t; #pragma warning(pop) #endif -#if defined(_MSC_VER) -#if defined(_M_IX86) -#define _X86_ -#endif - -#if defined(_M_AMD64) -#define _AMD64_ +#if defined(__cplusplus) +#define UTEST_C_FUNC extern "C" +#else +#define UTEST_C_FUNC #endif -#pragma warning(push, 1) -#include -#include -#pragma warning(pop) - +#if defined(_MSC_VER) +typedef union { + struct { + unsigned long LowPart; + long HighPart; + } DUMMYSTRUCTNAME; + struct { + unsigned long LowPart; + long HighPart; + } u; + utest_int64_t QuadPart; +} utest_large_integer; + +UTEST_C_FUNC __declspec(dllimport) int __stdcall QueryPerformanceCounter( + utest_large_integer *); +UTEST_C_FUNC __declspec(dllimport) int __stdcall QueryPerformanceFrequency( + utest_large_integer *); #elif defined(__linux__) /* @@ -111,29 +117,44 @@ typedef uint64_t utest_uint64_t; #include #endif -#if defined(_MSC_VER) +#if defined(_MSC_VER) && (_MSC_VER < 1920) #define UTEST_PRId64 "I64d" #define UTEST_PRIu64 "I64u" -#define UTEST_INLINE __forceinline - -#if defined(__cplusplus) -#define UTEST_C_FUNC extern "C" #else -#define UTEST_C_FUNC +#include + +#define UTEST_PRId64 PRId64 +#define UTEST_PRIu64 PRIu64 #endif +#if defined(_MSC_VER) +#define UTEST_INLINE __forceinline + #if defined(_WIN64) #define UTEST_SYMBOL_PREFIX #else #define UTEST_SYMBOL_PREFIX "_" #endif +#if defined(__clang__) +#define UTEST_INITIALIZER_BEGIN_DISABLE_WARNINGS \ + _Pragma("clang diagnostic push") \ + _Pragma("clang diagnostic ignored \"-Wmissing-variable-declarations\"") + +#define UTEST_INITIALIZER_END_DISABLE_WARNINGS _Pragma("clang diagnostic pop") +#else +#define UTEST_INITIALIZER_BEGIN_DISABLE_WARNINGS +#define UTEST_INITIALIZER_END_DISABLE_WARNINGS +#endif + #pragma section(".CRT$XCU", read) #define UTEST_INITIALIZER(f) \ static void __cdecl f(void); \ - __pragma(comment(linker, "/include:" UTEST_SYMBOL_PREFIX #f "_")); \ - UTEST_C_FUNC __declspec(allocate(".CRT$XCU")) void(__cdecl * f##_)(void) = \ - f; \ + UTEST_INITIALIZER_BEGIN_DISABLE_WARNINGS \ + __pragma(comment(linker, "/include:" UTEST_SYMBOL_PREFIX #f "_")) \ + UTEST_C_FUNC __declspec(allocate(".CRT$XCU")) void(__cdecl * \ + f##_)(void) = f; \ + UTEST_INITIALIZER_END_DISABLE_WARNINGS \ static void __cdecl f(void) #else #if defined(__linux__) @@ -153,10 +174,6 @@ typedef uint64_t utest_uint64_t; #endif #endif -#include - -#define UTEST_PRId64 PRId64 -#define UTEST_PRIu64 PRIu64 #define UTEST_INLINE inline #define UTEST_INITIALIZER(f) \ @@ -194,8 +211,8 @@ typedef uint64_t utest_uint64_t; static UTEST_INLINE utest_int64_t utest_ns(void) { #ifdef _MSC_VER - LARGE_INTEGER counter; - LARGE_INTEGER frequency; + utest_large_integer counter; + utest_large_integer frequency; QueryPerformanceCounter(&counter); QueryPerformanceFrequency(&frequency); return UTEST_CAST(utest_int64_t, @@ -263,19 +280,21 @@ UTEST_EXTERN struct utest_state_s utest_state; #pragma clang diagnostic pop #endif -#ifdef _MSC_VER -#define UTEST_SNPRINTF(BUFFER, N, ...) _snprintf_s(BUFFER, N, N, __VA_ARGS__) -#else #ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wvariadic-macros" #pragma clang diagnostic ignored "-Wc++98-compat-pedantic" #endif + +#ifdef _MSC_VER +#define UTEST_SNPRINTF(BUFFER, N, ...) _snprintf_s(BUFFER, N, N, __VA_ARGS__) +#else #define UTEST_SNPRINTF(...) snprintf(__VA_ARGS__) +#endif + #ifdef __clang__ #pragma clang diagnostic pop #endif -#endif #if defined(__cplusplus) /* if we are using c++ we can use overloaded methods (its in the language) */ @@ -871,36 +890,21 @@ UTEST_WEAK int utest_should_filter_test(const char *filter, return 0; } -static UTEST_INLINE int utest_strncmp(const char *a, const char *b, size_t n) { - /* strncmp breaks on Wall / Werror on gcc/clang, so we avoid using it */ - unsigned i; - - for (i = 0; i < n; i++) { - if (a[i] < b[i]) { - return -1; - } else if (a[i] > b[i]) { - return 1; - } - } - - return 0; -} - static UTEST_INLINE FILE *utest_fopen(const char *filename, const char *mode) { #ifdef _MSC_VER FILE *file; if (0 == fopen_s(&file, filename, mode)) { return file; } else { - return 0; + return UTEST_NULL; } #else return fopen(filename, mode); #endif } -UTEST_WEAK int utest_main(int argc, const char *const argv[]); -UTEST_WEAK int utest_main(int argc, const char *const argv[]) { +static UTEST_INLINE int utest_main(int argc, const char *const argv[]); +int utest_main(int argc, const char *const argv[]) { utest_uint64_t failed = 0; size_t index = 0; size_t *failed_testcases = UTEST_NULL; @@ -926,7 +930,7 @@ UTEST_WEAK int utest_main(int argc, const char *const argv[]) { const char filter_str[] = "--filter="; const char output_str[] = "--output="; - if (0 == utest_strncmp(argv[index], help_str, strlen(help_str))) { + if (0 == UTEST_STRNCMP(argv[index], help_str, strlen(help_str))) { printf("utest.h - the single file unit testing solution for C/C++!\n" "Command line Options:\n" " --help Show this message and exit.\n" @@ -938,13 +942,13 @@ UTEST_WEAK int utest_main(int argc, const char *const argv[]) { "specified in .\n"); goto cleanup; } else if (0 == - utest_strncmp(argv[index], filter_str, strlen(filter_str))) { + UTEST_STRNCMP(argv[index], filter_str, strlen(filter_str))) { /* user wants to filter what test cases run! */ filter = argv[index] + strlen(filter_str); } else if (0 == - utest_strncmp(argv[index], output_str, strlen(output_str))) { + UTEST_STRNCMP(argv[index], output_str, strlen(output_str))) { utest_state.output = utest_fopen(argv[index] + strlen(output_str), "w+"); - } else if (0 == utest_strncmp(argv[index], list_str, strlen(list_str))) { + } else if (0 == UTEST_STRNCMP(argv[index], list_str, strlen(list_str))) { for (index = 0; index < utest_state.tests_length; index++) { UTEST_PRINTF("%s\n", utest_state.tests[index].name); }