From 72b14497e81d9633a917fdfa0f3d8aa8e1db40b1 Mon Sep 17 00:00:00 2001 From: Friedrich Tschirpke Date: Tue, 6 Aug 2024 15:56:25 +0200 Subject: [PATCH] Fuzzy search (#103) --- Compiler.cmake | 2 +- src/core/content.cpp | 2 - src/core/content_library.hpp | 2 - src/core/models/character/decision.cpp | 1 + src/core/referencing_content_library.hpp | 29 -- .../searching/fuzzy_search/CMakeLists.txt | 1 + .../fuzzy_search/fuzzy_content_search.cpp | 221 +++++--------- .../fuzzy_search/fuzzy_content_search.hpp | 69 ++--- .../fuzzy_search/fuzzy_search_path.hpp | 61 ---- .../fuzzy_search/fuzzy_string_search.cpp | 287 ++++++++++++++++++ .../fuzzy_search/fuzzy_string_search.hpp | 14 + src/core/searching/fuzzy_search/trie.hpp | 68 ----- src/core/searching/fuzzy_search/trie_node.hpp | 113 ------- src/core/searching/search_result.hpp | 25 ++ src/core/session.cpp | 85 ++---- src/core/session.hpp | 10 +- src/core/storage_content_library.hpp | 29 -- src/gui/windows/fuzzy_search_window.cpp | 95 ++++-- src/gui/windows/fuzzy_search_window.hpp | 3 +- src/runtime_measurement/measurer.cpp | 8 +- src/runtime_measurement/measurer.hpp | 3 +- .../validation/validation_data_mock.hpp | 1 - 22 files changed, 548 insertions(+), 581 deletions(-) delete mode 100644 src/core/searching/fuzzy_search/fuzzy_search_path.hpp create mode 100644 src/core/searching/fuzzy_search/fuzzy_string_search.cpp create mode 100644 src/core/searching/fuzzy_search/fuzzy_string_search.hpp delete mode 100644 src/core/searching/fuzzy_search/trie.hpp delete mode 100644 src/core/searching/fuzzy_search/trie_node.hpp create mode 100644 src/core/searching/search_result.hpp diff --git a/Compiler.cmake b/Compiler.cmake index 87e3a190..b2b4c666 100644 --- a/Compiler.cmake +++ b/Compiler.cmake @@ -14,7 +14,7 @@ function(set_compiler_flags TARGET) else() target_compile_options(${TARGET} PRIVATE -pedantic -Wall -Wextra -Wpedantic -Werror -Wdisabled-optimization -Wcast-qual -Wold-style-cast - -Woverloaded-virtual -Wredundant-decls -Wsign-conversion -Wstrict-overflow=5 -Wzero-as-null-pointer-constant + -Woverloaded-virtual -Wredundant-decls -Wsign-conversion -Wstrict-overflow=5 ) if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") target_compile_options(${TARGET} PRIVATE -Wlogical-op -Wnoexcept -Wstrict-null-sentinel -Wuseless-cast) diff --git a/src/core/content.cpp b/src/core/content.cpp index 5d30ec24..5889fb2b 100644 --- a/src/core/content.cpp +++ b/src/core/content.cpp @@ -2,9 +2,7 @@ #include "content.hpp" -#include #include -#include #include diff --git a/src/core/content_library.hpp b/src/core/content_library.hpp index 10e65794..16231620 100644 --- a/src/core/content_library.hpp +++ b/src/core/content_library.hpp @@ -6,7 +6,6 @@ #include #include -#include #include namespace dnd { @@ -24,7 +23,6 @@ class ContentLibrary { virtual size_t size() const = 0; virtual OptCRef get(size_t index) const = 0; virtual OptCRef get(const std::string& name) const = 0; - virtual const TrieNode* get_fuzzy_search_trie_root() const = 0; }; } // namespace dnd diff --git a/src/core/models/character/decision.cpp b/src/core/models/character/decision.cpp index d0ae085c..c9854c4a 100644 --- a/src/core/models/character/decision.cpp +++ b/src/core/models/character/decision.cpp @@ -2,6 +2,7 @@ #include "decision.hpp" +#include #include #include #include diff --git a/src/core/referencing_content_library.hpp b/src/core/referencing_content_library.hpp index 345272a2..65024832 100644 --- a/src/core/referencing_content_library.hpp +++ b/src/core/referencing_content_library.hpp @@ -7,7 +7,6 @@ #include #include -#include #include #include @@ -33,34 +32,13 @@ class ReferencingContentLibrary : public ContentLibrary { * @return reference to the inserted content piece, or std::nullopt if a content piece with that name already exists */ OptCRef add(const T& content_piece); - const TrieNode* get_fuzzy_search_trie_root() const override; private: - void save_in_trie(const T* content_piece); - std::unordered_map> data; - Trie trie; }; // === IMPLEMENTATION === -template -requires isContentPieceType -void ReferencingContentLibrary::save_in_trie(const T* content_piece) { - std::string lower_name = string_lowercase_copy(content_piece->get_name()); - - trie.insert(lower_name, content_piece); - for (size_t i = 0; i < lower_name.size(); ++i) { - if (lower_name[i] == ' ' || lower_name[i] == '_' || lower_name[i] == '-') { - std::string_view after_sep(lower_name.c_str() + i + 1, lower_name.size() - i - 1); - trie.insert(after_sep, content_piece); - } - if (lower_name[i] == '(') { // do not include parentheses in trie - break; - } - } -} - template requires isContentPieceType bool ReferencingContentLibrary::contains(const std::string& name) const { @@ -111,19 +89,12 @@ OptCRef ReferencingContentLibrary::add(const T& content_piece) { const std::string name = content_piece.get_name(); auto [it, was_inserted] = data.emplace(name, std::cref(content_piece)); if (was_inserted) { - save_in_trie(&it->second.get()); return std::cref(content_piece); } else { return std::nullopt; } } -template -requires isContentPieceType -const TrieNode* ReferencingContentLibrary::get_fuzzy_search_trie_root() const { - return trie.get_root(); -} - } // namespace dnd #endif // REFERENCING_CONTENT_LIBRARY_HPP_ diff --git a/src/core/searching/fuzzy_search/CMakeLists.txt b/src/core/searching/fuzzy_search/CMakeLists.txt index 7c644842..5267a83b 100644 --- a/src/core/searching/fuzzy_search/CMakeLists.txt +++ b/src/core/searching/fuzzy_search/CMakeLists.txt @@ -1,5 +1,6 @@ target_sources(${DND_CORE} PRIVATE fuzzy_content_search.cpp + fuzzy_string_search.cpp ) diff --git a/src/core/searching/fuzzy_search/fuzzy_content_search.cpp b/src/core/searching/fuzzy_search/fuzzy_content_search.cpp index 09689525..17c3dd78 100644 --- a/src/core/searching/fuzzy_search/fuzzy_content_search.cpp +++ b/src/core/searching/fuzzy_search/fuzzy_content_search.cpp @@ -1,154 +1,97 @@ +#include #include #include "fuzzy_content_search.hpp" #include -#include #include -#include +#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include namespace dnd { -FuzzyContentSearch::FuzzyContentSearch(const Content& content) { - query.reserve(40); - character_search_path.push(content.get_characters().get_fuzzy_search_trie_root()); - class_search_path.push(content.get_classes().get_fuzzy_search_trie_root()); - subclass_search_path.push(content.get_subclasses().get_fuzzy_search_trie_root()); - species_search_path.push(content.get_species().get_fuzzy_search_trie_root()); - subspecies_search_path.push(content.get_subspecies().get_fuzzy_search_trie_root()); - item_search_path.push(content.get_items().get_fuzzy_search_trie_root()); - spell_search_path.push(content.get_spells().get_fuzzy_search_trie_root()); - feature_search_path.push(content.get_features().get_fuzzy_search_trie_root()); - choosable_search_path.push(content.get_choosables().get_fuzzy_search_trie_root()); -} - -FuzzyContentSearch::FuzzyContentSearch(const Content& content, const std::string& initial_query) - : FuzzyContentSearch(content) { - for (char c : initial_query) { - add_character_to_query(c); - } -} - -void FuzzyContentSearch::set_search_query(const std::string& new_query) { - if (new_query.empty()) { - clear_query(); - return; - } - while (query.size() > new_query.size()) { - remove_character_from_query(); - } - assert(query.size() <= new_query.size()); - - size_t common_length = 0; - while (common_length < query.size() && query[common_length] == new_query[common_length]) { - ++common_length; - } - - while (query.size() > common_length) { - remove_character_from_query(); - } - assert(query.size() == common_length); - - for (size_t i = common_length; i < new_query.size(); ++i) { - add_character_to_query(new_query[i]); - } - - assert(query.size() == new_query.size()); - for (size_t i = 0; i < query.size(); ++i) { - assert(query[i] == char_to_lowercase(new_query[i])); - } -} - -void FuzzyContentSearch::clear_query() { - while (!query.empty()) { - remove_character_from_query(); - } -} - -void FuzzyContentSearch::add_character_to_query(char c) { - c = char_to_lowercase(c); - query.push_back(c); - - character_search_path.push_top_child(c); - class_search_path.push_top_child(c); - subclass_search_path.push_top_child(c); - species_search_path.push_top_child(c); - subspecies_search_path.push_top_child(c); - item_search_path.push_top_child(c); - spell_search_path.push_top_child(c); - feature_search_path.push_top_child(c); - choosable_search_path.push_top_child(c); -} - -void FuzzyContentSearch::remove_character_from_query() { - if (query.empty()) { - return; - } - query.pop_back(); - - character_search_path.pop(); - assert(character_search_path.size() >= 1); - class_search_path.pop(); - assert(class_search_path.size() >= 1); - subclass_search_path.pop(); - assert(subclass_search_path.size() >= 1); - species_search_path.pop(); - assert(species_search_path.size() >= 1); - subspecies_search_path.pop(); - assert(subspecies_search_path.size() >= 1); - item_search_path.pop(); - assert(item_search_path.size() >= 1); - spell_search_path.pop(); - assert(spell_search_path.size() >= 1); - feature_search_path.pop(); - assert(feature_search_path.size() >= 1); - choosable_search_path.pop(); - assert(choosable_search_path.size() >= 1); -} - -std::unordered_set FuzzyContentSearch::get_results(const std::array& options) const { +std::vector fuzzy_search_content( + const Content& content, const std::string& search_query, const FuzzySearchOptions& options +) { DND_MEASURE_FUNCTION(); - std::unordered_set results; - - if (options[0]) { - character_search_path.insert_top_successors_into(results); - } - if (options[1]) { - species_search_path.insert_top_successors_into(results); - } - if (options[2]) { - class_search_path.insert_top_successors_into(results); - } - if (options[3]) { - subspecies_search_path.insert_top_successors_into(results); - } - if (options[4]) { - subclass_search_path.insert_top_successors_into(results); - } - if (options[5]) { - item_search_path.insert_top_successors_into(results); - } - if (options[6]) { - spell_search_path.insert_top_successors_into(results); - } - if (options[7]) { - feature_search_path.insert_top_successors_into(results); - } - if (options[8]) { - choosable_search_path.insert_top_successors_into(results); + std::vector results; + + int64_t min_match_score = 0; + + if (options.search_characters) { + for (const auto& [character_name, character] : content.get_characters().get_all()) { + int64_t match_score = fuzzy_match_string(search_query, character_name); + if (match_score > min_match_score) { + results.emplace_back(&character, match_score); + } + } + } + if (options.search_species) { + for (const auto& [species_name, species] : content.get_species().get_all()) { + int64_t match_score = fuzzy_match_string(search_query, species_name); + if (match_score > min_match_score) { + results.emplace_back(&species, match_score); + } + } + } + if (options.search_classes) { + for (const auto& [class_name, cls] : content.get_classes().get_all()) { + int64_t match_score = fuzzy_match_string(search_query, class_name); + if (match_score > min_match_score) { + results.emplace_back(&cls, match_score); + } + } + } + if (options.search_subspecies) { + for (const auto& [subspecies_name, subspecies] : content.get_subspecies().get_all()) { + int64_t match_score = fuzzy_match_string(search_query, subspecies_name); + if (match_score > min_match_score) { + results.emplace_back(&subspecies, match_score); + } + } + } + if (options.search_subclasses) { + for (const auto& [subclass_name, subclass] : content.get_subclasses().get_all()) { + int64_t match_score = fuzzy_match_string(search_query, subclass_name); + if (match_score > min_match_score) { + results.emplace_back(&subclass, match_score); + } + } + } + if (options.search_items) { + for (const auto& [item_name, item] : content.get_items().get_all()) { + int64_t match_score = fuzzy_match_string(search_query, item_name); + if (match_score > min_match_score) { + results.emplace_back(&item, match_score); + } + } + } + if (options.search_spells) { + for (const auto& [spell_name, spell] : content.get_spells().get_all()) { + int64_t match_score = fuzzy_match_string(search_query, spell_name); + if (match_score > min_match_score) { + results.emplace_back(&spell, match_score); + } + } + } + if (options.search_features) { + for (const auto& [feature_name, feature] : content.get_features().get_all()) { + int64_t match_score = fuzzy_match_string(search_query, feature_name); + if (match_score > min_match_score) { + results.emplace_back(&feature.get(), match_score); + } + } + } + if (options.search_choosables) { + for (const auto& [choosable_name, choosable] : content.get_choosables().get_all()) { + int64_t match_score = fuzzy_match_string(search_query, choosable_name); + if (match_score > min_match_score) { + results.emplace_back(&choosable, match_score); + } + } } return results; diff --git a/src/core/searching/fuzzy_search/fuzzy_content_search.hpp b/src/core/searching/fuzzy_search/fuzzy_content_search.hpp index 230cf009..48fe9690 100644 --- a/src/core/searching/fuzzy_search/fuzzy_content_search.hpp +++ b/src/core/searching/fuzzy_search/fuzzy_content_search.hpp @@ -3,53 +3,44 @@ #include -#include +#include #include -#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include namespace dnd { -/** - * @brief A class representing a content search query - */ -class FuzzyContentSearch { -public: - FuzzyContentSearch(const Content& content); - FuzzyContentSearch(const Content& content, const std::string& initial_query); - - void set_search_query(const std::string& new_query); - void clear_query(); - void add_character_to_query(char c); - void remove_character_from_query(); - - std::unordered_set get_results(const std::array& options) const; -private: - std::vector query; - FuzzySearchPath character_search_path; - FuzzySearchPath class_search_path; - FuzzySearchPath subclass_search_path; - FuzzySearchPath species_search_path; - FuzzySearchPath subspecies_search_path; - FuzzySearchPath item_search_path; - FuzzySearchPath spell_search_path; - FuzzySearchPath feature_search_path; - FuzzySearchPath choosable_search_path; +struct FuzzySearchOptions { + bool search_characters; + bool search_classes; + bool search_subclasses; + bool search_species; + bool search_subspecies; + bool search_items; + bool search_spells; + bool search_features; + bool search_choosables; + + std::strong_ordering operator<=>(const FuzzySearchOptions&) const = default; + + void set_all(bool value) { + search_characters = value; + search_classes = value; + search_subclasses = value; + search_species = value; + search_subspecies = value; + search_items = value; + search_spells = value; + search_features = value; + search_choosables = value; + } }; +std::vector fuzzy_search_content( + const Content& content, const std::string& search_query, const FuzzySearchOptions& options +); + } // namespace dnd diff --git a/src/core/searching/fuzzy_search/fuzzy_search_path.hpp b/src/core/searching/fuzzy_search/fuzzy_search_path.hpp deleted file mode 100644 index 00a81845..00000000 --- a/src/core/searching/fuzzy_search/fuzzy_search_path.hpp +++ /dev/null @@ -1,61 +0,0 @@ -#ifndef FUZZY_SEARCH_PATH_HPP_ -#define FUZZY_SEARCH_PATH_HPP_ - -#include - -#include -#include - -#include - -namespace dnd { - -template -class FuzzySearchPath : public std::stack*> { -public: - /** - * @brief Push the child of the top node onto the stack if it exists (otherwise push nullptr) - * @param c the character to get the child for - */ - void push_top_child(char c); - /** - * @brief Insert the successors of the top node into a vector if such successors exist - * @tparam S - * @param vector the vector to insert the successors into - * @return "true" if successors were inserted, "false" otherwise (nothing inserted) - */ - template - requires std::derived_from - bool insert_top_successors_into(std::unordered_set& set) const; -}; - - -// === IMPLEMENTATION === - -template -void FuzzySearchPath::push_top_child(char c) { - if (this->top() == nullptr) { - this->push(nullptr); - } else { - this->push(this->top()->get_child(c)); - } -} - -template -template -requires std::derived_from -bool FuzzySearchPath::insert_top_successors_into(std::unordered_set& set) const { - if (this->top() == nullptr) { - return false; - } - std::vector successors = this->top()->successors(); - if (successors.empty()) { - return false; - } - set.insert(successors.begin(), successors.end()); - return true; -} - -} // namespace dnd - -#endif // FUZZY_SEARCH_PATH_HPP_ diff --git a/src/core/searching/fuzzy_search/fuzzy_string_search.cpp b/src/core/searching/fuzzy_search/fuzzy_string_search.cpp new file mode 100644 index 00000000..162b1452 --- /dev/null +++ b/src/core/searching/fuzzy_search/fuzzy_string_search.cpp @@ -0,0 +1,287 @@ +#include + +#include "fuzzy_string_search.hpp" + +#include +#include +#include +#include +#include + +#include +#include + +struct LocalMaximum { + size_t index; + int64_t value; +}; + +namespace dnd { + +enum class CharType { + LOWER_CHAR, + UPPER_CHAR, + DIGIT, + WHITESPACE, + DELIMITER, + NON_WORD, +}; + +constexpr std::array delimiter_chars = {'-', '(', ')', ':', ','}; + +constexpr int16_t SCORE_MATCH = 16; +constexpr int16_t BONUS_FIRST = 1; +constexpr int16_t BONUS_BOUNDARY = 8; +constexpr int16_t BONUS_BOUNDARY_WHITESPACE = 10; +constexpr int16_t BONUS_BOUNDARY_DELIMITER = 9; +constexpr int16_t BONUS_NON_WORD = 8; +constexpr int16_t BONUS_CAMEL_CASE = 7; +constexpr int16_t BONUS_CONSECUTIVE = 8; +constexpr int16_t SCORE_GAP_START = -3; +constexpr int16_t SCORE_GAP_EXTENSION = -1; + +static CharType char_type(char c) { + if (std::isalpha(c)) { + if (std::isupper(c)) { + return CharType::UPPER_CHAR; + } else { + return CharType::LOWER_CHAR; + } + } else if (std::isdigit(c)) { + return CharType::DIGIT; + } else if (c == '\'') { + return CharType::LOWER_CHAR; // treat apostrophe as normal character (as it is often used in D&D words) + } else if (std::isspace(c)) { + return CharType::WHITESPACE; + } else if (std::find(delimiter_chars.cbegin(), delimiter_chars.cend(), c) != delimiter_chars.cend()) { + return CharType::DELIMITER; + } else { + return CharType::NON_WORD; + } +} + +int16_t bonus_for_types(CharType previous_type, CharType type) { + if (type != CharType::NON_WORD) { + switch (previous_type) { + case CharType::WHITESPACE: + return BONUS_BOUNDARY_WHITESPACE; + case CharType::DELIMITER: + return BONUS_BOUNDARY_DELIMITER; + case CharType::NON_WORD: + return BONUS_NON_WORD; + default: + break; + } + } + + if ((previous_type == CharType::LOWER_CHAR && type == CharType::UPPER_CHAR) + || (previous_type != CharType::DIGIT && type == CharType::DIGIT)) { + return BONUS_CAMEL_CASE; + } + + switch (type) { + case CharType::NON_WORD: + [[fallthrough]]; + case CharType::DELIMITER: + return BONUS_NON_WORD; + case CharType::WHITESPACE: + return BONUS_BOUNDARY_WHITESPACE; + default: + return 0; + } +} + +// fuzzy search implementation heavily inspired by fzf's algorithm +// see https://github.com/junegunn/fzf/blob/db01e7dab65423cd1d14e15f5b15dfaabe760283/src/algo/algo.go#L432 +int64_t fuzzy_match_string(const std::string& search_query, const std::string& string_to_match) { + if (search_query.empty() || string_to_match.empty()) { + return 0; + } + size_t query_len = search_query.size(); + size_t string_len = string_to_match.size(); + if (query_len > string_len) { + return 0; + } + + size_t min_idx = 0; + for (char c : string_to_match) { + if (char_to_lowercase(c) == char_to_lowercase(search_query[0])) { + break; + } + min_idx++; + } + if (min_idx == string_len) { + return 0; + } + + size_t max_idx = string_len; + for (size_t i = string_len - 1; i > 0; --i) { + char c = string_to_match[i]; + if (char_to_lowercase(c) == char_to_lowercase(search_query[query_len - 1])) { + break; + } + max_idx = i; + } + if (min_idx >= max_idx) { + return 0; + } + + size_t range_len = max_idx - min_idx; + + std::vector initial_scores(range_len); + std::vector initial_occupation(range_len); + std::vector bonus_points(range_len); + std::vector first_occurences(query_len); + std::string search_range = string_to_match.substr(min_idx, range_len); + string_lowercase_inplace(search_range); + + int16_t max_score = 0; + size_t max_score_idx = 0; + + char first_query_char = char_to_lowercase(search_query[0]); + char query_char = char_to_lowercase(search_query[0]); + int16_t previous_inital_bonus = 0; + CharType previous_type = CharType::WHITESPACE; + bool in_gap = false; + + size_t query_idx = 0; + size_t last_idx = 0; + + // calculate bonus values and initial scores + for (size_t i = 0; i < range_len; ++i) { + char c = search_range[i]; + CharType type = char_type(c); + if (type == CharType::UPPER_CHAR) { + c = char_to_lowercase(c); + search_range[i] = c; + } + int16_t bonus = bonus_for_types(previous_type, type); + bonus_points[i] = bonus; + previous_type = type; + + if (c == query_char) { + if (query_idx < query_len) { + first_occurences[query_idx] = i; + query_idx++; + query_char = search_query[std::min(query_idx, query_len - 1)]; + query_char = char_to_lowercase(query_char); + } + last_idx = i; + } + + if (c == first_query_char) { + int16_t score = SCORE_MATCH + bonus * 2; + initial_scores[i] = score; + initial_occupation[i] = 1; + if (range_len == 1 && score > max_score) { + max_score = score; + max_score_idx = i; + if (bonus >= BONUS_BOUNDARY) { + break; + } + } + in_gap = false; + } else { + int16_t gap_penalty = in_gap ? SCORE_GAP_EXTENSION : SCORE_GAP_START; + int16_t score = previous_inital_bonus + gap_penalty; + if (score < 0) { + initial_scores[i] = 0; + } else { + initial_scores[0] = score; + } + initial_occupation[i] = 0; + in_gap = true; + } + } + + if (query_idx != query_len) { // did not match whole query + return 0; + } + + if (query_len == 1) { + return static_cast(min_idx + max_score_idx); + } + + size_t very_first_occurence = first_occurences[0]; + size_t match_width = last_idx - very_first_occurence + 1; + std::vector scores(match_width * query_len); + std::vector occupation(match_width * query_len); + + size_t idx = 0; + for (size_t i = very_first_occurence; i <= last_idx; ++i) { + scores[idx] = initial_scores[i]; + occupation[idx] = initial_occupation[i]; + } + + // calculate scores + size_t occurence_count = first_occurences.size() - 1; + for (size_t i = 0; i < occurence_count; ++i) { + query_idx = i + 1; + query_char = char_to_lowercase(search_query[query_idx]); + size_t occurence = first_occurences[query_idx]; + size_t row = query_idx * match_width; + in_gap = false; + + char* search_range_subrange = search_range.data() + occurence; + int16_t* bonus_points_subrange = bonus_points.data() + occurence; + int16_t* occupation_subrange = occupation.data() + row + occurence - very_first_occurence; + int16_t* occupation_diagonal = occupation_subrange - 1 - match_width; + int16_t* scores_subrange = scores.data() + row + occurence - very_first_occurence; + int16_t* scores_diagonal = scores_subrange - 1 - match_width; + int16_t* scores_left = scores_subrange - 1; + scores_left[0] = 0; + + for (size_t j = 0; j <= last_idx - occurence; ++j) { + char c = search_range_subrange[j]; + size_t col = j + occurence; + + int16_t score1 = 0; + int16_t score2 = 0; + int16_t consecutive = 0; + + int16_t gap_penalty = in_gap ? SCORE_GAP_EXTENSION : SCORE_GAP_START; + score2 = scores_left[j] + gap_penalty; + + if (query_char == c) { + score1 = scores_diagonal[j] + SCORE_MATCH; + int16_t bonus = bonus_points_subrange[j]; + consecutive = occupation_diagonal[j] + 1; + if (consecutive > 1) { + int16_t occupation_bonus = bonus_points[col - static_cast(consecutive) + 1]; + if (bonus >= BONUS_BOUNDARY && bonus > occupation_bonus) { + consecutive = 1; + } else { + bonus = std::max(bonus, std::max(BONUS_CONSECUTIVE, occupation_bonus)); + } + } + if (score1 + bonus < score2) { + score1 += bonus_points_subrange[j]; + consecutive = 0; + } else { + score1 += bonus; + } + } + occupation_subrange[j] = consecutive; + + in_gap = score1 < score2; + int16_t score = std::max(static_cast(0), std::max(score1, score2)); + if (query_idx == query_len - 1 && score > max_score) { + max_score = score; + max_score_idx = col; + } + scores_subrange[j] = score; + } + } + + for (size_t i = 0; i < query_len; ++i) { + if (char_to_lowercase(search_query[i]) == char_to_lowercase(string_to_match[i])) { + max_score += BONUS_FIRST; + } else { + break; + } + } + + return max_score; +} + +} // namespace dnd diff --git a/src/core/searching/fuzzy_search/fuzzy_string_search.hpp b/src/core/searching/fuzzy_search/fuzzy_string_search.hpp new file mode 100644 index 00000000..664c2945 --- /dev/null +++ b/src/core/searching/fuzzy_search/fuzzy_string_search.hpp @@ -0,0 +1,14 @@ +#ifndef FUZZY_STRING_SEARCH_HPP_ +#define FUZZY_STRING_SEARCH_HPP_ + +#include + +#include + +namespace dnd { + +int64_t fuzzy_match_string(const std::string& search_query, const std::string& string_to_match); + +} // namespace dnd + +#endif // FUZZY_STRING_SEARCH_HPP diff --git a/src/core/searching/fuzzy_search/trie.hpp b/src/core/searching/fuzzy_search/trie.hpp deleted file mode 100644 index 5fe92638..00000000 --- a/src/core/searching/fuzzy_search/trie.hpp +++ /dev/null @@ -1,68 +0,0 @@ -#ifndef TRIE_HPP_ -#define TRIE_HPP_ - -#include - -#include -#include - -#include - -namespace dnd { - -/** - * @brief A trie data structure that stores strings and associated data - * @tparam T the type of data associated with the end of a word - */ -template -class Trie { -public: - /** - * @brief Constructor for the trie - */ - Trie(); - /** - * @brief Inserts a word into the trie with the given data (as a pointer). - * @param word the string to insert (as string_view) - * @param data the data to associate with the end of the word - */ - void insert(std::string_view word, const T* data); - /** - * @brief Get the root node of the trie - * @return the root node - */ - const TrieNode* get_root() const; -private: - // the root node of the trie - TrieNode root; -}; - - -// === IMPLEMENTATION === - -template -Trie::Trie() : root() {} - -template -void Trie::insert(std::string_view word, const T* data) { - TrieNode* current_node = &root; - for (char c : word) { - TrieNode* child = current_node->get_child(c); - if (child == nullptr) { - child = current_node->create_child(c); - } - assert(child != nullptr); - assert(current_node->get_child(c) == child); - current_node = child; - } - current_node->add_end_word(data); -} - -template -inline const TrieNode* Trie::get_root() const { - return &root; -} - -} // namespace dnd - -#endif // TRIE_HPP_ diff --git a/src/core/searching/fuzzy_search/trie_node.hpp b/src/core/searching/fuzzy_search/trie_node.hpp deleted file mode 100644 index 122c40ac..00000000 --- a/src/core/searching/fuzzy_search/trie_node.hpp +++ /dev/null @@ -1,113 +0,0 @@ -#ifndef TRIE_NODE_HPP_ -#define TRIE_NODE_HPP_ - -#include -#include -#include -#include - -namespace dnd { - -/** - * @brief A node of a trie data structure that stores a character and a map to its children - * @tparam T the type of data associated with the end of a word - */ -template -class TrieNode { -public: - const std::map>& get_children() const; - TrieNode* get_child(char c); - const TrieNode* get_child(char c) const; - /** - * @brief Get the data associated with the end of a word - * @return the data - */ - const std::vector& get_end_words() const; - /** - * @brief Create an empty child node for the given character - * @param c the given character - * @return a pointer to the created node - */ - TrieNode* create_child(char c); - /** - * @brief Add a piece of data associated with the end of a word - * @param end_word the data - */ - void add_end_word(const T* end_word); - /** - * @brief Return a set of all successors of the node (including the node itself) - * @return a set of pointers to all the successor nodes - */ - std::vector successors() const; -private: - // the children of this node in the trie - std::map> children; - // a pointer to the data associated with the end of a word - std::vector end_words; -}; - - -// === IMPLEMENTATION === - -template -const std::map>& TrieNode::get_children() const { - return children; -} - -template -TrieNode* TrieNode::get_child(char c) { - if (children.contains(c)) { - return &children.at(c); - } - return nullptr; -} - -template -const TrieNode* TrieNode::get_child(char c) const { - if (children.contains(c)) { - return &children.at(c); - } - return nullptr; -} - -template -const std::vector& TrieNode::get_end_words() const { - return end_words; -} - -template -TrieNode* TrieNode::create_child(char c) { - children[c] = TrieNode(); - return &children.at(c); -} - -template -void TrieNode::add_end_word(const T* end_word) { - end_words.push_back(end_word); -} - -template -std::vector TrieNode::successors() const { - std::vector successors; - std::stack*> node_stack; - node_stack.push(this); - - while (!node_stack.empty()) { - const TrieNode* current_node = node_stack.top(); - assert(current_node != nullptr); - node_stack.pop(); - - const std::vector& current_end_words = current_node->get_end_words(); - if (!current_end_words.empty()) { - successors.insert(successors.end(), current_end_words.cbegin(), current_end_words.cend()); - } - for (auto it = current_node->get_children().crbegin(); it != current_node->get_children().crend(); ++it) { - node_stack.push(&it->second); - } - } - return successors; -} - -} // namespace dnd - -#endif // TRIE_NODE_HPP_ diff --git a/src/core/searching/search_result.hpp b/src/core/searching/search_result.hpp new file mode 100644 index 00000000..9118ab72 --- /dev/null +++ b/src/core/searching/search_result.hpp @@ -0,0 +1,25 @@ +#ifndef SEARCH_RESULT_HPP_ +#define SEARCH_RESULT_HPP_ + +#include + +#include + +namespace dnd { + +class SearchResult { +public: + SearchResult() noexcept = default; + SearchResult(const ContentPiece* content_piece_ptr, int64_t significance) noexcept; + + const ContentPiece* content_piece_ptr; + int64_t significance; +}; + + +inline SearchResult::SearchResult(const ContentPiece* content_piece_ptr, int64_t significance) noexcept + : content_piece_ptr(content_piece_ptr), significance(significance) {} + +} // namespace dnd + +#endif // SEARCH_RESULT_HPP_ diff --git a/src/core/session.cpp b/src/core/session.cpp index cb3e26ae..1f2552f6 100644 --- a/src/core/session.cpp +++ b/src/core/session.cpp @@ -6,9 +6,8 @@ #include #include #include -#include #include -#include +#include #include #include @@ -21,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -36,7 +36,7 @@ namespace dnd { Session::Session(const char* last_session_filename) : last_session_filename(last_session_filename), status(SessionStatus::CONTENT_DIR_SELECTION), content_directory(), campaign_name(), parsing_future(), errors(), content(), last_session_open_tabs(), open_content_pieces(), - selected_content_piece(), fuzzy_search(), fuzzy_search_results(max_search_results), fuzzy_search_result_count(0), + selected_content_piece(), fuzzy_search_results(max_search_results), fuzzy_search_result_strings(max_search_results), advanced_search(content), unknown_error_messages() {} Session::~Session() { save_session_values(); } @@ -65,9 +65,9 @@ const ContentPiece* Session::get_selected_content_piece() { return rv; } -size_t Session::get_fuzzy_search_result_count() const { return fuzzy_search_result_count; } +size_t Session::get_fuzzy_search_result_count() const { return fuzzy_search_results.size(); } -bool Session::too_many_fuzzy_search_results() const { return fuzzy_search_result_count > max_search_results; } +bool Session::too_many_fuzzy_search_results() const { return fuzzy_search_results.size() > max_search_results; } std::vector Session::get_possible_campaign_names() const { if (content_directory.empty()) { @@ -86,12 +86,12 @@ std::vector Session::get_possible_campaign_names() const { std::vector Session::get_fuzzy_search_result_strings() const { DND_MEASURE_FUNCTION(); ListContentVisitor list_content_visitor; - list_content_visitor.reserve(fuzzy_search_result_count); - if (fuzzy_search_result_count > max_search_results) { + list_content_visitor.reserve(fuzzy_search_results.size()); + if (fuzzy_search_results.size() > max_search_results) { return {}; } - for (size_t i = 0; i < fuzzy_search_result_count; ++i) { - fuzzy_search_results[i]->accept_visitor(list_content_visitor); + for (size_t i = 0; i < fuzzy_search_results.size(); ++i) { + fuzzy_search_results[i].content_piece_ptr->accept_visitor(list_content_visitor); } return list_content_visitor.get_list(); } @@ -222,65 +222,29 @@ Errors Session::set_content_directory(const std::filesystem::path& new_content_d return content_dir_errors; } -// A comparator for content pieces that sorts them by name and prioritizes those whose name starts with a -// givencharacter -class ContentPieceComparator { -public: - explicit ContentPieceComparator(const std::string& search_query) - : query_length(search_query.size()), lower_query(string_lowercase_copy(search_query)), - upper_query(string_uppercase_copy(search_query)) {} - - bool starts_with_search_query(const std::string& name) const { - size_t i = 0; - while (i < query_length) { - if (name[i] != lower_query[i] && name[i] != upper_query[i]) { - return false; - } - if (++i >= name.size()) { - return false; - } - } - return true; - } - - bool operator()(const ContentPiece* a, const ContentPiece* b) const { - const std::string& a_name = a->get_name(); - const std::string& b_name = b->get_name(); - bool prioritize_a = starts_with_search_query(a_name); - bool prioritize_b = starts_with_search_query(b_name); - if (prioritize_a && !prioritize_b) { - return true; - } - if (!prioritize_a && prioritize_b) { - return false; - } - return a->get_name() < b->get_name(); - } -private: - size_t query_length; - std::string lower_query; - std::string upper_query; -}; - -void Session::set_fuzzy_search(const std::string& search_query, const std::array& search_options) { +void Session::set_fuzzy_search(const std::string& search_query, const FuzzySearchOptions& search_options) { DND_MEASURE_FUNCTION(); - fuzzy_search->set_search_query(search_query); - std::unordered_set set_search_results = fuzzy_search->get_results(search_options); - fuzzy_search_result_count = set_search_results.size(); - if (fuzzy_search_result_count > max_search_results) { + if (search_query.size() < 3) { + fuzzy_search_results.clear(); return; } - ContentPieceComparator comparator(search_query); - fuzzy_search_results.insert(fuzzy_search_results.begin(), set_search_results.begin(), set_search_results.end()); - std::span results_span(fuzzy_search_results.begin(), fuzzy_search_result_count); - std::sort(results_span.begin(), results_span.end(), comparator); + fuzzy_search_results = fuzzy_search_content(content, search_query, search_options); + std::sort( + fuzzy_search_results.begin(), fuzzy_search_results.end(), + [](const SearchResult& a, const SearchResult& b) { + if (a.significance == b.significance) { + return a.content_piece_ptr->get_name() < b.content_piece_ptr->get_name(); + } + return a.significance > b.significance; + } + ); } void Session::open_fuzzy_search_result(size_t index) { - if (index >= fuzzy_search_result_count) { + if (index >= fuzzy_search_results.size()) { return; } - open_content_piece(fuzzy_search_results[index]); + open_content_piece(fuzzy_search_results[index].content_piece_ptr); } void Session::open_advanced_search_result(size_t index) { @@ -334,7 +298,6 @@ void Session::parse_content_and_initialize() { ParsingResult parsing_result = parse_content(content_directory, campaign_name); content = std::move(parsing_result.content); errors = std::move(parsing_result.errors); - fuzzy_search = std::make_unique(content); for (const Error& error : errors.get_errors()) { switch (error.index()) { case 0: { diff --git a/src/core/session.hpp b/src/core/session.hpp index 6fb861f3..402e9464 100644 --- a/src/core/session.hpp +++ b/src/core/session.hpp @@ -3,11 +3,9 @@ #include -#include #include #include #include -#include #include #include #include @@ -58,7 +56,7 @@ class Session { size_t get_fuzzy_search_result_count() const; bool too_many_fuzzy_search_results() const; std::vector get_fuzzy_search_result_strings() const; - void set_fuzzy_search(const std::string& search_query, const std::array& search_options); + void set_fuzzy_search(const std::string& search_query, const FuzzySearchOptions& search_options); void open_fuzzy_search_result(size_t index); std::vector get_advanced_search_result_strings() const; @@ -75,7 +73,7 @@ class Session { void open_last_session(); void open_content_piece(const ContentPiece* content_piece); - static constexpr int max_search_results = 500; + static constexpr int max_search_results = 1000; const char* const last_session_filename; @@ -93,9 +91,7 @@ class Session { std::deque open_content_pieces; const ContentPiece* selected_content_piece; - std::unique_ptr fuzzy_search; - std::vector fuzzy_search_results; - size_t fuzzy_search_result_count; + std::vector fuzzy_search_results; std::vector fuzzy_search_result_strings; AdvancedContentSearch advanced_search; diff --git a/src/core/storage_content_library.hpp b/src/core/storage_content_library.hpp index 98dd757b..a5917872 100644 --- a/src/core/storage_content_library.hpp +++ b/src/core/storage_content_library.hpp @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include @@ -43,35 +42,14 @@ class StorageContentLibrary : public ContentLibrary { void add_draft(std::pair&& draft); void add_draft(typename T::Data&& draft_data, Errors&& draft_errors); OptCRef add_result(CreateResult&& content_piece_result); - const TrieNode* get_fuzzy_search_trie_root() const override; private: - void save_in_trie(const T& content_piece); - std::unordered_map data; - Trie trie; std::vector> drafts; }; // === IMPLEMENTATION === -template -requires isContentPieceType -void StorageContentLibrary::save_in_trie(const T& content_piece) { - std::string lower_name = string_lowercase_copy(content_piece.get_name()); - - trie.insert(lower_name, &content_piece); - for (size_t i = 0; i < lower_name.size(); ++i) { - if (lower_name[i] == ' ' || lower_name[i] == '_' || lower_name[i] == '-') { - std::string_view after_sep(lower_name.c_str() + i + 1, lower_name.size() - i - 1); - trie.insert(after_sep, &content_piece); - } - if (lower_name[i] == '(') { // do not include parentheses in trie - break; - } - } -} - template requires isContentPieceType bool StorageContentLibrary::contains(const std::string& name) const { @@ -128,7 +106,6 @@ OptCRef StorageContentLibrary::add(T&& content_piece) { const std::string name = content_piece.get_name(); auto [it, was_inserted] = data.emplace(name, std::move(content_piece)); if (was_inserted) { - save_in_trie(it->second); return std::cref(it->second); } else { return std::nullopt; @@ -158,12 +135,6 @@ void StorageContentLibrary::add_draft(typename T::Data&& draft_data, Errors&& drafts.emplace_back(std::move(draft_data), std::move(draft_errors)); } -template -requires isContentPieceType -const TrieNode* StorageContentLibrary::get_fuzzy_search_trie_root() const { - return trie.get_root(); -} - } // namespace dnd #endif // STORAGE_CONTENT_LIBRARY_HPP_ diff --git a/src/gui/windows/fuzzy_search_window.cpp b/src/gui/windows/fuzzy_search_window.cpp index a68aff1f..5daac953 100644 --- a/src/gui/windows/fuzzy_search_window.cpp +++ b/src/gui/windows/fuzzy_search_window.cpp @@ -2,7 +2,6 @@ #include "fuzzy_search_window.hpp" -#include #include #include @@ -14,17 +13,7 @@ namespace dnd { FuzzySearchWindow::FuzzySearchWindow(Session& session) : session(session), search_query(), search_options() { - search_options.fill(true); -} - -static void fuzzy_search_option_line(size_t index, const char* const name, std::array& options) { - std::string only_button_label = fmt::format("Only##{}", index); - if (ImGui::Button(only_button_label.c_str())) { - options.fill(false); - options[index] = true; - } - ImGui::SameLine(); - ImGui::Checkbox(name, &options[index]); + search_options.set_all(true); } void FuzzySearchWindow::render() { @@ -35,20 +24,75 @@ void FuzzySearchWindow::render() { if (ImGui::InputText("Search", &search_query, ImGuiInputTextFlags_EscapeClearsAll, nullptr, nullptr)) { search_changed = true; } - const std::array old_fuzzy_search_options = search_options; + FuzzySearchOptions old_fuzzy_search_options = search_options; if (ImGui::TreeNode("Search options")) { if (ImGui::Button("All", ImVec2(200, 0))) { - search_options.fill(true); + search_options.set_all(true); + } + + if (ImGui::Button("Only##Characters")) { + search_options.set_all(false); + search_options.search_characters = true; + } + ImGui::SameLine(); + ImGui::Checkbox("Characters", &search_options.search_characters); + + if (ImGui::Button("Only##Species")) { + search_options.set_all(false); + search_options.search_species = true; + } + ImGui::SameLine(); + ImGui::Checkbox("Species", &search_options.search_species); + + if (ImGui::Button("Only##Subspecies")) { + search_options.set_all(false); + search_options.search_subspecies = true; + } + ImGui::SameLine(); + ImGui::Checkbox("Subspecies", &search_options.search_subspecies); + + if (ImGui::Button("Only##Classes")) { + search_options.set_all(false); + search_options.search_classes = true; + } + ImGui::SameLine(); + ImGui::Checkbox("Classes", &search_options.search_classes); + + if (ImGui::Button("Only##Subclasses")) { + search_options.set_all(false); + search_options.search_subclasses = true; } - fuzzy_search_option_line(0, "Characters", search_options); - fuzzy_search_option_line(1, "Species", search_options); - fuzzy_search_option_line(3, "Subspecies", search_options); - fuzzy_search_option_line(2, "Classes", search_options); - fuzzy_search_option_line(4, "Subclasses", search_options); - fuzzy_search_option_line(5, "Items", search_options); - fuzzy_search_option_line(6, "Spells", search_options); - fuzzy_search_option_line(7, "Features", search_options); - fuzzy_search_option_line(8, "Choosables", search_options); + ImGui::SameLine(); + ImGui::Checkbox("Subclasses", &search_options.search_subclasses); + + if (ImGui::Button("Only##Items")) { + search_options.set_all(false); + search_options.search_items = true; + } + ImGui::SameLine(); + ImGui::Checkbox("Items", &search_options.search_items); + + if (ImGui::Button("Only##Spells")) { + search_options.set_all(false); + search_options.search_spells = true; + } + ImGui::SameLine(); + ImGui::Checkbox("Spells", &search_options.search_spells); + + if (ImGui::Button("Only##Features")) { + search_options.set_all(false); + search_options.search_features = true; + } + ImGui::SameLine(); + ImGui::Checkbox("Features", &search_options.search_features); + + if (ImGui::Button("Only##Choosables")) { + search_options.set_all(false); + search_options.search_choosables = true; + } + ImGui::SameLine(); + ImGui::Checkbox("Choosables", &search_options.search_choosables); + ImGui::TreePop(); } if (old_fuzzy_search_options != search_options) { @@ -59,9 +103,14 @@ void FuzzySearchWindow::render() { session.set_fuzzy_search(search_query, search_options); } ImGui::Separator(); + if (search_query.empty()) { ImGui::End(); return; + } else if (search_query.size() < 3) { + ImGui::Text("Please enter more characters to specify the search."); + ImGui::End(); + return; } size_t search_result_count = session.get_fuzzy_search_result_count(); if (search_result_count == 0) { diff --git a/src/gui/windows/fuzzy_search_window.hpp b/src/gui/windows/fuzzy_search_window.hpp index f338af7c..e4d1707c 100644 --- a/src/gui/windows/fuzzy_search_window.hpp +++ b/src/gui/windows/fuzzy_search_window.hpp @@ -3,7 +3,6 @@ #include -#include #include #include @@ -17,7 +16,7 @@ class FuzzySearchWindow { private: Session& session; std::string search_query; - std::array search_options; + FuzzySearchOptions search_options; }; } // namespace dnd diff --git a/src/runtime_measurement/measurer.cpp b/src/runtime_measurement/measurer.cpp index 43e6c352..09cab26b 100644 --- a/src/runtime_measurement/measurer.cpp +++ b/src/runtime_measurement/measurer.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -33,7 +34,9 @@ static constexpr std::array values_for_human_readable = { void Measurer::beginSession(const std::string& name, const std::string& filepath = "results.json") { session_start_time = std::chrono::system_clock::now(); - session = new MeasuringSession{name, filepath, {{"tspeciesEvents", nlohmann::json::array()}}}; + session = std::unique_ptr( + new MeasuringSession{.name = name, .filepath = filepath, .json = {{"tspeciesEvents", nlohmann::json::array()}}} + ); } void Measurer::endSession() { @@ -90,7 +93,6 @@ void Measurer::endSession() { output_stream.open(session->filepath); output_stream << std::setw(4) << session->json << std::flush; output_stream.close(); - delete session; session = nullptr; } @@ -118,7 +120,7 @@ void Timer::stop() { size_t thread_id = std::hash{}(std::this_thread::get_id()); - Measurer::get().writeProfile({name, start, end, thread_id}); + Measurer::get().writeProfile(TimerResult{name, start, end, thread_id}); stopped = true; } diff --git a/src/runtime_measurement/measurer.hpp b/src/runtime_measurement/measurer.hpp index 5d1438cc..6a1b3ee9 100644 --- a/src/runtime_measurement/measurer.hpp +++ b/src/runtime_measurement/measurer.hpp @@ -2,6 +2,7 @@ #define MEASURER_HPP_ #include +#include #include #include @@ -66,7 +67,7 @@ class Measurer { // a mutex to control writing access to the results json static std::mutex write_profile_mutex; // the current measuring session, nullptr if there is none - MeasuringSession* session; + std::unique_ptr session; // the start of the current session std::chrono::system_clock::time_point session_start_time; }; diff --git a/tests/testcore/validation/validation_data_mock.hpp b/tests/testcore/validation/validation_data_mock.hpp index 4dd39997..097a5584 100644 --- a/tests/testcore/validation/validation_data_mock.hpp +++ b/tests/testcore/validation/validation_data_mock.hpp @@ -4,7 +4,6 @@ #include #include -#include #include