Skip to content

Commit

Permalink
Fuzzy search (#103)
Browse files Browse the repository at this point in the history
  • Loading branch information
ftschirpke authored Aug 6, 2024
1 parent dd3b111 commit 72b1449
Show file tree
Hide file tree
Showing 22 changed files with 548 additions and 581 deletions.
2 changes: 1 addition & 1 deletion Compiler.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ function(set_compiler_flags TARGET)
else()
target_compile_options(${TARGET} PRIVATE
-pedantic -Wall -Wextra -Wpedantic -Werror -Wdisabled-optimization -Wcast-qual -Wold-style-cast
-Woverloaded-virtual -Wredundant-decls -Wsign-conversion -Wstrict-overflow=5 -Wzero-as-null-pointer-constant
-Woverloaded-virtual -Wredundant-decls -Wsign-conversion -Wstrict-overflow=5
)
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
target_compile_options(${TARGET} PRIVATE -Wlogical-op -Wnoexcept -Wstrict-null-sentinel -Wuseless-cast)
Expand Down
2 changes: 0 additions & 2 deletions src/core/content.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,7 @@

#include "content.hpp"

#include <sstream>
#include <string>
#include <unordered_map>

#include <fmt/format.h>

Expand Down
2 changes: 0 additions & 2 deletions src/core/content_library.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
#include <string>

#include <core/models/content_piece.hpp>
#include <core/searching/fuzzy_search/trie_node.hpp>
#include <core/utils/types.hpp>

namespace dnd {
Expand All @@ -24,7 +23,6 @@ class ContentLibrary {
virtual size_t size() const = 0;
virtual OptCRef<T> get(size_t index) const = 0;
virtual OptCRef<T> get(const std::string& name) const = 0;
virtual const TrieNode<T>* get_fuzzy_search_trie_root() const = 0;
};

} // namespace dnd
Expand Down
1 change: 1 addition & 0 deletions src/core/models/character/decision.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include "decision.hpp"

#include <cassert>
#include <set>
#include <string>
#include <utility>
Expand Down
29 changes: 0 additions & 29 deletions src/core/referencing_content_library.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
#include <unordered_map>

#include <core/content_library.hpp>
#include <core/searching/fuzzy_search/trie.hpp>
#include <core/utils/string_manipulation.hpp>
#include <core/utils/types.hpp>

Expand All @@ -33,34 +32,13 @@ class ReferencingContentLibrary : public ContentLibrary<T> {
* @return reference to the inserted content piece, or std::nullopt if a content piece with that name already exists
*/
OptCRef<T> add(const T& content_piece);
const TrieNode<T>* get_fuzzy_search_trie_root() const override;
private:
void save_in_trie(const T* content_piece);

std::unordered_map<std::string, std::reference_wrapper<const T>> data;
Trie<T> trie;
};


// === IMPLEMENTATION ===

template <typename T>
requires isContentPieceType<T>
void ReferencingContentLibrary<T>::save_in_trie(const T* content_piece) {
std::string lower_name = string_lowercase_copy(content_piece->get_name());

trie.insert(lower_name, content_piece);
for (size_t i = 0; i < lower_name.size(); ++i) {
if (lower_name[i] == ' ' || lower_name[i] == '_' || lower_name[i] == '-') {
std::string_view after_sep(lower_name.c_str() + i + 1, lower_name.size() - i - 1);
trie.insert(after_sep, content_piece);
}
if (lower_name[i] == '(') { // do not include parentheses in trie
break;
}
}
}

template <typename T>
requires isContentPieceType<T>
bool ReferencingContentLibrary<T>::contains(const std::string& name) const {
Expand Down Expand Up @@ -111,19 +89,12 @@ OptCRef<T> ReferencingContentLibrary<T>::add(const T& content_piece) {
const std::string name = content_piece.get_name();
auto [it, was_inserted] = data.emplace(name, std::cref(content_piece));
if (was_inserted) {
save_in_trie(&it->second.get());
return std::cref(content_piece);
} else {
return std::nullopt;
}
}

template <typename T>
requires isContentPieceType<T>
const TrieNode<T>* ReferencingContentLibrary<T>::get_fuzzy_search_trie_root() const {
return trie.get_root();
}

} // namespace dnd

#endif // REFERENCING_CONTENT_LIBRARY_HPP_
1 change: 1 addition & 0 deletions src/core/searching/fuzzy_search/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
target_sources(${DND_CORE}
PRIVATE
fuzzy_content_search.cpp
fuzzy_string_search.cpp
)

221 changes: 82 additions & 139 deletions src/core/searching/fuzzy_search/fuzzy_content_search.cpp
Original file line number Diff line number Diff line change
@@ -1,154 +1,97 @@
#include <cstdint>
#include <dnd_config.hpp>

#include "fuzzy_content_search.hpp"

#include <array>
#include <cassert>
#include <string>
#include <unordered_set>
#include <vector>

#include <core/content.hpp>
#include <core/models/character/character.hpp>
#include <core/models/class/class.hpp>
#include <core/models/content_piece.hpp>
#include <core/models/effects_provider/choosable.hpp>
#include <core/models/effects_provider/feature.hpp>
#include <core/models/item/item.hpp>
#include <core/models/species/species.hpp>
#include <core/models/spell/spell.hpp>
#include <core/models/subclass/subclass.hpp>
#include <core/models/subspecies/subspecies.hpp>
#include <core/utils/char_manipulation.hpp>
#include <core/searching/fuzzy_search/fuzzy_string_search.hpp>
#include <core/searching/search_result.hpp>

namespace dnd {

FuzzyContentSearch::FuzzyContentSearch(const Content& content) {
query.reserve(40);
character_search_path.push(content.get_characters().get_fuzzy_search_trie_root());
class_search_path.push(content.get_classes().get_fuzzy_search_trie_root());
subclass_search_path.push(content.get_subclasses().get_fuzzy_search_trie_root());
species_search_path.push(content.get_species().get_fuzzy_search_trie_root());
subspecies_search_path.push(content.get_subspecies().get_fuzzy_search_trie_root());
item_search_path.push(content.get_items().get_fuzzy_search_trie_root());
spell_search_path.push(content.get_spells().get_fuzzy_search_trie_root());
feature_search_path.push(content.get_features().get_fuzzy_search_trie_root());
choosable_search_path.push(content.get_choosables().get_fuzzy_search_trie_root());
}

FuzzyContentSearch::FuzzyContentSearch(const Content& content, const std::string& initial_query)
: FuzzyContentSearch(content) {
for (char c : initial_query) {
add_character_to_query(c);
}
}

void FuzzyContentSearch::set_search_query(const std::string& new_query) {
if (new_query.empty()) {
clear_query();
return;
}
while (query.size() > new_query.size()) {
remove_character_from_query();
}
assert(query.size() <= new_query.size());

size_t common_length = 0;
while (common_length < query.size() && query[common_length] == new_query[common_length]) {
++common_length;
}

while (query.size() > common_length) {
remove_character_from_query();
}
assert(query.size() == common_length);

for (size_t i = common_length; i < new_query.size(); ++i) {
add_character_to_query(new_query[i]);
}

assert(query.size() == new_query.size());
for (size_t i = 0; i < query.size(); ++i) {
assert(query[i] == char_to_lowercase(new_query[i]));
}
}

void FuzzyContentSearch::clear_query() {
while (!query.empty()) {
remove_character_from_query();
}
}

void FuzzyContentSearch::add_character_to_query(char c) {
c = char_to_lowercase(c);
query.push_back(c);

character_search_path.push_top_child(c);
class_search_path.push_top_child(c);
subclass_search_path.push_top_child(c);
species_search_path.push_top_child(c);
subspecies_search_path.push_top_child(c);
item_search_path.push_top_child(c);
spell_search_path.push_top_child(c);
feature_search_path.push_top_child(c);
choosable_search_path.push_top_child(c);
}

void FuzzyContentSearch::remove_character_from_query() {
if (query.empty()) {
return;
}
query.pop_back();

character_search_path.pop();
assert(character_search_path.size() >= 1);
class_search_path.pop();
assert(class_search_path.size() >= 1);
subclass_search_path.pop();
assert(subclass_search_path.size() >= 1);
species_search_path.pop();
assert(species_search_path.size() >= 1);
subspecies_search_path.pop();
assert(subspecies_search_path.size() >= 1);
item_search_path.pop();
assert(item_search_path.size() >= 1);
spell_search_path.pop();
assert(spell_search_path.size() >= 1);
feature_search_path.pop();
assert(feature_search_path.size() >= 1);
choosable_search_path.pop();
assert(choosable_search_path.size() >= 1);
}

std::unordered_set<const ContentPiece*> FuzzyContentSearch::get_results(const std::array<bool, 9>& options) const {
std::vector<SearchResult> fuzzy_search_content(
const Content& content, const std::string& search_query, const FuzzySearchOptions& options
) {
DND_MEASURE_FUNCTION();
std::unordered_set<const ContentPiece*> results;

if (options[0]) {
character_search_path.insert_top_successors_into(results);
}
if (options[1]) {
species_search_path.insert_top_successors_into(results);
}
if (options[2]) {
class_search_path.insert_top_successors_into(results);
}
if (options[3]) {
subspecies_search_path.insert_top_successors_into(results);
}
if (options[4]) {
subclass_search_path.insert_top_successors_into(results);
}
if (options[5]) {
item_search_path.insert_top_successors_into(results);
}
if (options[6]) {
spell_search_path.insert_top_successors_into(results);
}
if (options[7]) {
feature_search_path.insert_top_successors_into(results);
}
if (options[8]) {
choosable_search_path.insert_top_successors_into(results);
std::vector<SearchResult> results;

int64_t min_match_score = 0;

if (options.search_characters) {
for (const auto& [character_name, character] : content.get_characters().get_all()) {
int64_t match_score = fuzzy_match_string(search_query, character_name);
if (match_score > min_match_score) {
results.emplace_back(&character, match_score);
}
}
}
if (options.search_species) {
for (const auto& [species_name, species] : content.get_species().get_all()) {
int64_t match_score = fuzzy_match_string(search_query, species_name);
if (match_score > min_match_score) {
results.emplace_back(&species, match_score);
}
}
}
if (options.search_classes) {
for (const auto& [class_name, cls] : content.get_classes().get_all()) {
int64_t match_score = fuzzy_match_string(search_query, class_name);
if (match_score > min_match_score) {
results.emplace_back(&cls, match_score);
}
}
}
if (options.search_subspecies) {
for (const auto& [subspecies_name, subspecies] : content.get_subspecies().get_all()) {
int64_t match_score = fuzzy_match_string(search_query, subspecies_name);
if (match_score > min_match_score) {
results.emplace_back(&subspecies, match_score);
}
}
}
if (options.search_subclasses) {
for (const auto& [subclass_name, subclass] : content.get_subclasses().get_all()) {
int64_t match_score = fuzzy_match_string(search_query, subclass_name);
if (match_score > min_match_score) {
results.emplace_back(&subclass, match_score);
}
}
}
if (options.search_items) {
for (const auto& [item_name, item] : content.get_items().get_all()) {
int64_t match_score = fuzzy_match_string(search_query, item_name);
if (match_score > min_match_score) {
results.emplace_back(&item, match_score);
}
}
}
if (options.search_spells) {
for (const auto& [spell_name, spell] : content.get_spells().get_all()) {
int64_t match_score = fuzzy_match_string(search_query, spell_name);
if (match_score > min_match_score) {
results.emplace_back(&spell, match_score);
}
}
}
if (options.search_features) {
for (const auto& [feature_name, feature] : content.get_features().get_all()) {
int64_t match_score = fuzzy_match_string(search_query, feature_name);
if (match_score > min_match_score) {
results.emplace_back(&feature.get(), match_score);
}
}
}
if (options.search_choosables) {
for (const auto& [choosable_name, choosable] : content.get_choosables().get_all()) {
int64_t match_score = fuzzy_match_string(search_query, choosable_name);
if (match_score > min_match_score) {
results.emplace_back(&choosable, match_score);
}
}
}

return results;
Expand Down
Loading

0 comments on commit 72b1449

Please sign in to comment.