From 159a8a113f5324b46571805ac9f1cc19f2b0ee85 Mon Sep 17 00:00:00 2001 From: Stsiapan Bahrytsevich Date: Wed, 30 Oct 2024 16:35:51 +0100 Subject: [PATCH 1/6] fix(search_family): Fix LOAD fields parsing in the FT.AGGREGATE and FT.SEARCH commands fixes dragonflydb#3989 Signed-off-by: Stsiapan Bahrytsevich --- src/core/search/search.cc | 14 +- src/core/search/search.h | 9 +- src/server/search/doc_accessors.cc | 11 +- src/server/search/doc_index.cc | 51 ++++-- src/server/search/doc_index.h | 104 +++++++++++- src/server/search/search_family.cc | 36 ++++- src/server/search/search_family_test.cc | 200 ++++++++++++++++++++++++ 7 files changed, 391 insertions(+), 34 deletions(-) diff --git a/src/core/search/search.cc b/src/core/search/search.cc index 3faca2c4e1db..123f8fa2c695 100644 --- a/src/core/search/search.cc +++ b/src/core/search/search.cc @@ -501,6 +501,12 @@ string_view Schema::LookupAlias(string_view alias) const { return alias; } +string_view Schema::LookupIdentifier(string_view identifier) const { + if (auto it = fields.find(identifier); it != fields.end()) + return it->second.short_name; + return identifier; +} + IndicesOptions::IndicesOptions() { static absl::flat_hash_set kDefaultStopwords{ "a", "is", "the", "an", "and", "are", "as", "at", "be", "but", "by", @@ -646,10 +652,14 @@ const Schema& FieldIndices::GetSchema() const { return schema_; } -vector> FieldIndices::ExtractStoredValues(DocId doc) const { +vector> FieldIndices::ExtractStoredValues( + DocId doc, const absl::flat_hash_map& aliases) const { vector> out; for (const auto& [ident, index] : sort_indices_) { - out.emplace_back(ident, index->Lookup(doc)); + const auto& it = aliases.find(ident); + const auto& name = it == aliases.end() ? schema_.LookupIdentifier(ident) : it->second; + + out.emplace_back(name, index->Lookup(doc)); } return out; } diff --git a/src/core/search/search.h b/src/core/search/search.h index 85798da4779e..08ef30c9b21b 100644 --- a/src/core/search/search.h +++ b/src/core/search/search.h @@ -60,6 +60,9 @@ struct Schema { // Return identifier for alias if found, otherwise return passed value std::string_view LookupAlias(std::string_view alias) const; + + // Return alias for identifier if found, otherwise return passed value + std::string_view LookupIdentifier(std::string_view identifier) const; }; struct IndicesOptions { @@ -89,7 +92,11 @@ class FieldIndices { const Schema& GetSchema() const; // Extract values stored in sort indices - std::vector> ExtractStoredValues(DocId doc) const; + // aliases are specified in addition to the aliases in search::Schema + std::vector> ExtractStoredValues( + DocId doc, + const absl::flat_hash_map& + aliases) const; absl::flat_hash_set GetSortIndiciesFields() const; diff --git a/src/server/search/doc_accessors.cc b/src/server/search/doc_accessors.cc index cfd7c5e16ce5..09732085d214 100644 --- a/src/server/search/doc_accessors.cc +++ b/src/server/search/doc_accessors.cc @@ -83,7 +83,10 @@ FieldValue ExtractSortableValueFromJson(const search::Schema& schema, string_vie SearchDocData BaseAccessor::Serialize( const search::Schema& schema, absl::Span> fields) const { SearchDocData out{}; - for (const auto& [fident, fname] : fields) { + for (const auto& field : fields) { + const auto& fident = field.GetIdentifier(schema, false); + const auto& fname = field.GetShortName(schema); + auto field_value = ExtractSortableValue(schema, fident, absl::StrJoin(GetStrings(fident).value(), ",")); if (field_value) { @@ -348,14 +351,16 @@ JsonAccessor::JsonPathContainer* JsonAccessor::GetPath(std::string_view field) c SearchDocData JsonAccessor::Serialize(const search::Schema& schema) const { SearchFieldsList fields{}; for (const auto& [fname, fident] : schema.field_names) - fields.emplace_back(fident, fname); + fields.emplace_back(fident, NameType::kIdentifier, fname); return Serialize(schema, fields); } SearchDocData JsonAccessor::Serialize( const search::Schema& schema, absl::Span> fields) const { SearchDocData out{}; - for (const auto& [ident, name] : fields) { + for (const auto& field : fields) { + const auto& ident = field.GetIdentifier(schema, true); + const auto& name = field.GetShortName(schema); if (auto* path = GetPath(ident); path) { if (auto res = path->Evaluate(json_); !res.empty()) { auto field_value = ExtractSortableValueFromJson(schema, ident, res[0]); diff --git a/src/server/search/doc_index.cc b/src/server/search/doc_index.cc index e24acceeda45..9cc6328ded70 100644 --- a/src/server/search/doc_index.cc +++ b/src/server/search/doc_index.cc @@ -60,8 +60,8 @@ bool SerializedSearchDoc::operator>=(const SerializedSearchDoc& other) const { return this->score >= other.score; } -bool SearchParams::ShouldReturnField(std::string_view field) const { - auto cb = [field](const auto& entry) { return entry.first == field; }; +bool SearchParams::ShouldReturnField(std::string_view alias) const { + auto cb = [alias](const auto& entry) { return entry.GetShortName() == alias; }; return !return_fields || any_of(return_fields->begin(), return_fields->end(), cb); } @@ -224,12 +224,13 @@ bool ShardDocIndex::Matches(string_view key, unsigned obj_code) const { return base_->Matches(key, obj_code); } -SearchFieldsList ToSV(const std::optional& fields) { +SearchFieldsList ToSV(const search::Schema& schema, + const std::optional& fields) { SearchFieldsList sv_fields; if (fields) { sv_fields.reserve(fields->size()); - for (const auto& [fident, fname] : fields.value()) { - sv_fields.emplace_back(fident, fname); + for (const auto& field : fields.value()) { + sv_fields.emplace_back(field); } } return sv_fields; @@ -243,8 +244,8 @@ SearchResult ShardDocIndex::Search(const OpArgs& op_args, const SearchParams& pa if (!search_results.error.empty()) return SearchResult{facade::ErrorReply{std::move(search_results.error)}}; - SearchFieldsList fields_to_load = - ToSV(params.ShouldReturnAllFields() ? params.load_fields : params.return_fields); + SearchFieldsList fields_to_load = ToSV( + base_->schema, params.ShouldReturnAllFields() ? params.load_fields : params.return_fields); vector out; out.reserve(search_results.ids.size()); @@ -294,8 +295,19 @@ vector ShardDocIndex::SearchForAggregator( if (!search_results.error.empty()) return {}; - SearchFieldsList fields_to_load = - GetFieldsToLoad(params.load_fields, indices_->GetSortIndiciesFields()); + auto sort_indicies_fields = indices_->GetSortIndiciesFields(); + SearchFieldsList fields_to_load = GetFieldsToLoad(params.load_fields, sort_indicies_fields); + + // aliases for ExtractStoredValues + absl::flat_hash_map sort_indicies_aliases; + if (params.load_fields) { + for (const auto& field : params.load_fields.value()) { + auto ident = field.GetIdentifier(base_->schema); + if (sort_indicies_fields.contains(ident)) { + sort_indicies_aliases[ident] = field.GetShortName(); + } + } + } vector> out; for (DocId doc : search_results.ids) { @@ -306,7 +318,7 @@ vector ShardDocIndex::SearchForAggregator( continue; auto accessor = GetAccessor(op_args.db_cntx, (*it)->second); - auto extracted = indices_->ExtractStoredValues(doc); + auto extracted = indices_->ExtractStoredValues(doc, sort_indicies_aliases); SearchDocData loaded = accessor->Serialize(base_->schema, fields_to_load); @@ -320,25 +332,32 @@ vector ShardDocIndex::SearchForAggregator( SearchFieldsList ShardDocIndex::GetFieldsToLoad( const std::optional& load_fields, const absl::flat_hash_set& skip_fields) const { - // identifier to short name - absl::flat_hash_map unique_fields; + absl::flat_hash_map> unique_fields; unique_fields.reserve(base_->schema.field_names.size()); for (const auto& [fname, fident] : base_->schema.field_names) { if (!skip_fields.contains(fident)) { - unique_fields[fident] = fname; + unique_fields[fident] = {std::string_view{fident}, NameType::kIdentifier, + std::string_view{fname}}; } } if (load_fields) { - for (const auto& [fident, fname] : load_fields.value()) { + for (const auto& field : load_fields.value()) { + const auto& fident = field.GetIdentifier(base_->schema); if (!skip_fields.contains(fident)) { - unique_fields[fident] = fname; + unique_fields[fident] = field; } } } - return {unique_fields.begin(), unique_fields.end()}; + SearchFieldsList fields; + fields.reserve(unique_fields.size()); + for (auto& [_, field] : unique_fields) { + fields.emplace_back(std::move(field)); + } + + return fields; } DocIndexInfo ShardDocIndex::GetInfo() const { diff --git a/src/server/search/doc_index.h b/src/server/search/doc_index.h index 1c775f30c245..1e163ff33c1a 100644 --- a/src/server/search/doc_index.h +++ b/src/server/search/doc_index.h @@ -52,7 +52,102 @@ struct SearchResult { std::optional error; }; -template using SearchField = std::pair; +enum class NameType : uint8_t { kIdentifier, kShortName, kUndefined }; + +template class SearchField { + private: + using SingleName = std::pair; + + static bool IsJsonPath(const T& name) { + if (name.size() < 2) { + return false; + } + return name.front() == '$' && (name[1] == '.' || name[1] == '['); + } + + public: + SearchField() = default; + + SearchField(T name, NameType name_type) : name_(std::make_pair(std::move(name), name_type)) { + } + + SearchField(T name, NameType name_type, T new_alias) + : name_(std::make_pair(std::move(name), name_type)), new_alias_(std::move(new_alias)) { + } + + template >> + explicit SearchField(const SearchField& other) + : name_(std::make_pair(T{other.name_.first}, other.name_.second)) { + if (other.HasNewAlias()) { + new_alias_ = T{other.new_alias_.value()}; + } else { + new_alias_.reset(); + } + } + + template >> + SearchField& operator=(const SearchField& other) { + name_ = std::make_pair(T{other.name_.first}, other.name_.second); + if (other.HasNewAlias()) { + new_alias_ = T{other.new_alias_.value()}; + } else { + new_alias_.reset(); + } + return *this; + } + + ~SearchField() = default; + + std::string_view GetIdentifier(const search::Schema& schema) const { + return GetIdentifier(schema, [&](const SingleName& single_name) { + return single_name.second == NameType::kIdentifier; + }); + } + + std::string_view GetIdentifier(const search::Schema& schema, bool is_json_field) const { + return GetIdentifier(schema, [&](const SingleName& single_name) { + return single_name.second == NameType::kIdentifier || + (is_json_field && IsJsonPath(single_name.first)); + }); + } + + std::string_view GetShortName() const { + if (HasNewAlias()) { + return new_alias_.value(); + } + return name_.first; + } + + std::string_view GetShortName(const search::Schema& schema) const { + if (HasNewAlias()) { + return new_alias_.value(); + } + + if (name_.second == NameType::kShortName) { + return name_.first; + } + return schema.LookupIdentifier(std::string_view{name_.first}); + } + + private: + template + std::string_view GetIdentifier(const search::Schema& schema, Callback is_identifier) const { + if (is_identifier(name_)) { + return name_.first; + } + return schema.LookupAlias(std::string_view{name_.first}); + } + + bool HasNewAlias() const { + return new_alias_.has_value(); + } + + template friend class SearchField; + + private: + SingleName name_; + std::optional new_alias_; +}; using SearchFieldsList = std::vector>; using OwnedSearchFieldsList = std::vector>; @@ -88,7 +183,7 @@ struct SearchParams { return return_fields && return_fields->empty(); } - bool ShouldReturnField(std::string_view field) const; + bool ShouldReturnField(std::string_view alias) const; }; struct AggregateParams { @@ -169,8 +264,9 @@ class ShardDocIndex { io::Result GetTagVals(std::string_view field) const; private: - // Returns the fields that are the union of the already indexed fields and load_fields, excluding - // skip_fields Load_fields should not be destroyed while the result of this function is being used + /* Returns the fields that are the union of the already indexed fields and load_fields, excluding + skip_fields. + Load_fields should not be destroyed while the result of this function is being used */ SearchFieldsList GetFieldsToLoad(const std::optional& load_fields, const absl::flat_hash_set& skip_fields) const; diff --git a/src/server/search/search_family.cc b/src/server/search/search_family.cc index 8f41f33f97c6..c7d17d10e303 100644 --- a/src/server/search/search_family.cc +++ b/src/server/search/search_family.cc @@ -183,9 +183,13 @@ optional ParseSchemaOrReply(DocIndex::DataType type, CmdArgParse #pragma GCC diagnostic pop #endif +bool StartsWithAtSign(std::string_view field) { + return !field.empty() && field.front() == '@'; +} + std::string_view ParseField(CmdArgParser* parser) { std::string_view field = parser->Next(); - if (!field.empty() && field.front() == '@') { + if (StartsWithAtSign(field)) { field.remove_prefix(1); // remove leading @ if exists } return field; @@ -193,7 +197,7 @@ std::string_view ParseField(CmdArgParser* parser) { std::string_view ParseFieldWithAtSign(CmdArgParser* parser) { std::string_view field = parser->Next(); - if (!field.empty() && field.front() == '@') { + if (StartsWithAtSign(field)) { field.remove_prefix(1); // remove leading @ } else { // Temporary warning until we can throw an error @@ -204,15 +208,25 @@ std::string_view ParseFieldWithAtSign(CmdArgParser* parser) { } void ParseLoadFields(CmdArgParser* parser, std::optional* load_fields) { + // TODO: Change to num_strings. In Redis strings number is expected. For example: LOAD 3 $.a AS a size_t num_fields = parser->Next(); if (!load_fields->has_value()) { load_fields->emplace(); } while (num_fields--) { - string_view field = ParseField(parser); - string_view alias = parser->Check("AS") ? parser->Next() : field; - load_fields->value().emplace_back(field, alias); + string_view str = parser->Next(); + + if (StartsWithAtSign(str)) { + str.remove_prefix(1); // remove leading @ + } + + if (parser->Check("AS")) { + load_fields->value().emplace_back(std::string{str}, NameType::kShortName, + std::string{parser->Next()}); + } else { + load_fields->value().emplace_back(std::string{str}, NameType::kShortName); + } } } @@ -248,12 +262,18 @@ optional ParseSearchParamsOrReply(CmdArgParser* parser, SinkReplyB } // RETURN {num} [{ident} AS {name}...] + /* TODO: Change to num_strings. In Redis strings number is expected. For example: RETURN 3 $.a AS a */ size_t num_fields = parser->Next(); params.return_fields.emplace(); while (params.return_fields->size() < num_fields) { - string_view ident = parser->Next(); - string_view alias = parser->Check("AS") ? parser->Next() : ident; - params.return_fields->emplace_back(ident, alias); + std::string_view str = parser->Next(); + + if (parser->Check("AS")) { + params.return_fields->emplace_back(std::string{str}, NameType::kShortName, + std::string{parser->Next()}); + } else { + params.return_fields->emplace_back(std::string{str}, NameType::kShortName); + } } } else if (parser->Check("NOCONTENT")) { // NOCONTENT params.load_fields.emplace(); diff --git a/src/server/search/search_family_test.cc b/src/server/search/search_family_test.cc index 804a3ed13b9e..2a22196855c2 100644 --- a/src/server/search/search_family_test.cc +++ b/src/server/search/search_family_test.cc @@ -1425,4 +1425,204 @@ TEST_F(SearchFamilyTest, WrongVectorFieldType) { EXPECT_THAT(resp, AreDocIds("j6", "j7", "j1", "j4")); } +TEST_F(SearchFamilyTest, SearchLoadReturnJson) { + Run({"JSON.SET", "j1", ".", R"({"a":"one"})"}); + Run({"JSON.SET", "j2", ".", R"({"a":"two"})"}); + + auto resp = Run({"FT.CREATE", "i1", "ON", "JSON", "SCHEMA", "$.a", "AS", "a", "TEXT"}); + EXPECT_EQ(resp, "OK"); + + // Search with RETURN $.a + resp = Run({"FT.SEARCH", "i1", "*", "RETURN", "1", "$.a"}); + EXPECT_THAT(resp, IsMapWithSize("j1", IsMap("$.a", "\"one\""), "j2", IsMap("$.a", "\"two\""))); + + // Search with RETURN a + resp = Run({"FT.SEARCH", "i1", "*", "RETURN", "1", "a"}); + EXPECT_THAT(resp, IsMapWithSize("j1", IsMap("a", "\"one\""), "j2", IsMap("a", "\"two\""))); + + // Search with RETURN @a + resp = Run({"FT.SEARCH", "i1", "*", "RETURN", "1", "@a"}); + EXPECT_THAT(resp, IsMapWithSize("j1", IsMap(), "j2", IsMap())); + + // Search with RETURN $.a AS vvv + resp = Run({"FT.SEARCH", "i1", "*", "RETURN", "1", "$.a", "AS", "vvv"}); + EXPECT_THAT(resp, IsMapWithSize("j1", IsMap("vvv", "\"one\""), "j2", IsMap("vvv", "\"two\""))); + + // Search with RETURN a AS vvv + resp = Run({"FT.SEARCH", "i1", "*", "RETURN", "1", "a", "AS", "vvv"}); + EXPECT_THAT(resp, IsMapWithSize("j1", IsMap("vvv", "\"one\""), "j2", IsMap("vvv", "\"two\""))); + + // Search with RETURN @a AS vvv + resp = Run({"FT.SEARCH", "i1", "*", "RETURN", "1", "@a", "AS", "vvv"}); + EXPECT_THAT(resp, IsMapWithSize("j1", IsMap(), "j2", IsMap())); + + // Search with LOAD $.a + resp = Run({"FT.SEARCH", "i1", "*", "LOAD", "1", "$.a"}); + EXPECT_THAT(resp, IsMapWithSize("j1", IsMap("$.a", "\"one\"", "$", R"({"a":"one"})"), "j2", + IsMap("$.a", "\"two\"", "$", R"({"a":"two"})"))); + + // Search with LOAD a + resp = Run({"FT.SEARCH", "i1", "*", "LOAD", "1", "a"}); + EXPECT_THAT(resp, IsMapWithSize("j1", IsMap("a", "\"one\"", "$", R"({"a":"one"})"), "j2", + IsMap("a", "\"two\"", "$", R"({"a":"two"})"))); + + // Search with LOAD @a + resp = Run({"FT.SEARCH", "i1", "*", "LOAD", "1", "@a"}); + EXPECT_THAT(resp, IsMapWithSize("j1", IsMap("a", "\"one\"", "$", R"({"a":"one"})"), "j2", + IsMap("a", "\"two\"", "$", R"({"a":"two"})"))); + + // Search with LOAD $.a AS vvv + resp = Run({"FT.SEARCH", "i1", "*", "LOAD", "1", "$.a", "AS", "vvv"}); + EXPECT_THAT(resp, IsMapWithSize("j1", IsMap("$", R"({"a":"one"})", "vvv", "\"one\""), "j2", + IsMap("$", R"({"a":"two"})", "vvv", "\"two\""))); + + // Search with LOAD a AS vvv + resp = Run({"FT.SEARCH", "i1", "*", "LOAD", "1", "a", "AS", "vvv"}); + EXPECT_THAT(resp, IsMapWithSize("j1", IsMap("$", R"({"a":"one"})", "vvv", "\"one\""), "j2", + IsMap("$", R"({"a":"two"})", "vvv", "\"two\""))); + + // Search with LOAD @a AS vvv + resp = Run({"FT.SEARCH", "i1", "*", "LOAD", "1", "@a", "AS", "vvv"}); + EXPECT_THAT(resp, IsMapWithSize("j1", IsMap("$", R"({"a":"one"})", "vvv", "\"one\""), "j2", + IsMap("$", R"({"a":"two"})", "vvv", "\"two\""))); + + /* Test another name */ + + resp = Run({"FT.CREATE", "i2", "ON", "JSON", "SCHEMA", "$.a", "AS", "nnn", "TEXT"}); + EXPECT_EQ(resp, "OK"); + + // Search with RETURN nnn + resp = Run({"FT.SEARCH", "i2", "*", "RETURN", "1", "nnn"}); + EXPECT_THAT(resp, IsMapWithSize("j1", IsMap("nnn", "\"one\""), "j2", IsMap("nnn", "\"two\""))); + + // Search with RETURN @nnn + resp = Run({"FT.SEARCH", "i2", "*", "RETURN", "1", "@nnn"}); + EXPECT_THAT(resp, IsMapWithSize("j1", IsMap(), "j2", IsMap())); + + // Search with RETURN a + resp = Run({"FT.SEARCH", "i2", "*", "RETURN", "1", "a"}); + EXPECT_THAT(resp, IsMapWithSize("j1", IsMap(), "j2", IsMap())); + + // Search with RETURN @a + resp = Run({"FT.SEARCH", "i2", "*", "RETURN", "1", "@a"}); + EXPECT_THAT(resp, IsMapWithSize("j1", IsMap(), "j2", IsMap())); + + // Search with LOAD nnn + resp = Run({"FT.SEARCH", "i2", "*", "LOAD", "1", "nnn"}); + EXPECT_THAT(resp, IsMapWithSize("j1", IsMap("nnn", "\"one\"", "$", R"({"a":"one"})"), "j2", + IsMap("nnn", "\"two\"", "$", R"({"a":"two"})"))); + + // Search with LOAD @nnn + resp = Run({"FT.SEARCH", "i2", "*", "LOAD", "1", "@nnn"}); + EXPECT_THAT(resp, IsMapWithSize("j1", IsMap("nnn", "\"one\"", "$", R"({"a":"one"})"), "j2", + IsMap("nnn", "\"two\"", "$", R"({"a":"two"})"))); + + // Search with LOAD a + resp = Run({"FT.SEARCH", "i2", "*", "LOAD", "1", "a"}); + EXPECT_THAT( + resp, IsMapWithSize("j1", IsMap("$", R"({"a":"one"})"), "j2", IsMap("$", R"({"a":"two"})"))); + + // Search with LOAD @a + resp = Run({"FT.SEARCH", "i2", "*", "LOAD", "1", "@a"}); + EXPECT_THAT( + resp, IsMapWithSize("j1", IsMap("$", R"({"a":"one"})"), "j2", IsMap("$", R"({"a":"two"})"))); +} + +TEST_F(SearchFamilyTest, SearchLoadReturnHash) { + Run({"HSET", "h1", "a", "one"}); + Run({"HSET", "h2", "a", "two"}); + + auto resp = Run({"FT.CREATE", "i1", "ON", "HASH", "SCHEMA", "a", "TEXT"}); + EXPECT_EQ(resp, "OK"); + + // Search with RETURN $.a + resp = Run({"FT.SEARCH", "i1", "*", "RETURN", "1", "$.a"}); + EXPECT_THAT(resp, IsMapWithSize("h2", IsMap(), "h1", IsMap())); + + // Search with RETURN a + resp = Run({"FT.SEARCH", "i1", "*", "RETURN", "1", "a"}); + EXPECT_THAT(resp, IsMapWithSize("h2", IsMap("a", "two"), "h1", IsMap("a", "one"))); + + // Search with RETURN @a + resp = Run({"FT.SEARCH", "i1", "*", "RETURN", "1", "@a"}); + EXPECT_THAT(resp, IsMapWithSize("h2", IsMap(), "h1", IsMap())); + + // Search with RETURN $.a AS vvv + resp = Run({"FT.SEARCH", "i1", "*", "RETURN", "1", "$.a", "AS", "vvv"}); + EXPECT_THAT(resp, IsMapWithSize("h2", IsMap(), "h1", IsMap())); + + // Search with RETURN a AS vvv + resp = Run({"FT.SEARCH", "i1", "*", "RETURN", "1", "a", "AS", "vvv"}); + EXPECT_THAT(resp, IsMapWithSize("h2", IsMap("vvv", "two"), "h1", IsMap("vvv", "one"))); + + // Search with RETURN @a AS vvv + resp = Run({"FT.SEARCH", "i1", "*", "RETURN", "1", "@a", "AS", "vvv"}); + EXPECT_THAT(resp, IsMapWithSize("h2", IsMap(), "h1", IsMap())); + + // Search with LOAD $.a + resp = Run({"FT.SEARCH", "i1", "*", "LOAD", "1", "$.a"}); + EXPECT_THAT(resp, IsMapWithSize("h2", IsMap("a", "two"), "h1", IsMap("a", "one"))); + + // Search with LOAD a + resp = Run({"FT.SEARCH", "i1", "*", "LOAD", "1", "a"}); + EXPECT_THAT(resp, IsMapWithSize("h2", IsMap("a", "two"), "h1", IsMap("a", "one"))); + + // Search with LOAD @a + resp = Run({"FT.SEARCH", "i1", "*", "LOAD", "1", "@a"}); + EXPECT_THAT(resp, IsMapWithSize("h2", IsMap("a", "two"), "h1", IsMap("a", "one"))); + + // Search with LOAD $.a AS vvv + resp = Run({"FT.SEARCH", "i1", "*", "LOAD", "1", "$.a", "AS", "vvv"}); + EXPECT_THAT(resp, IsMapWithSize("h2", IsMap("a", "two"), "h1", IsMap("a", "one"))); + + // Search with LOAD a AS vvv + resp = Run({"FT.SEARCH", "i1", "*", "LOAD", "1", "a", "AS", "vvv"}); + EXPECT_THAT(resp, IsMapWithSize("h2", IsMap("vvv", "two", "a", "two"), "h1", + IsMap("vvv", "one", "a", "one"))); + + // Search with LOAD @a AS vvv + resp = Run({"FT.SEARCH", "i1", "*", "LOAD", "1", "@a", "AS", "vvv"}); + EXPECT_THAT(resp, IsMapWithSize("h2", IsMap("vvv", "two", "a", "two"), "h1", + IsMap("vvv", "one", "a", "one"))); + + /* Test another name */ + + resp = Run({"FT.CREATE", "i2", "ON", "HASH", "SCHEMA", "a", "AS", "nnn", "TEXT"}); + EXPECT_EQ(resp, "OK"); + + // Search with RETURN nnn + resp = Run({"FT.SEARCH", "i2", "*", "RETURN", "1", "nnn"}); + EXPECT_THAT(resp, IsMapWithSize("h2", IsMap("nnn", "two"), "h1", IsMap("nnn", "one"))); + + // Search with RETURN @nnn + resp = Run({"FT.SEARCH", "i2", "*", "RETURN", "1", "@nnn"}); + EXPECT_THAT(resp, IsMapWithSize("h1", IsMap(), "h2", IsMap())); + + // Search with RETURN a + resp = Run({"FT.SEARCH", "i2", "*", "RETURN", "1", "a"}); + EXPECT_THAT(resp, IsMapWithSize("h2", IsMap("a", "two"), "h1", IsMap("a", "one"))); + + // Search with RETURN @a + resp = Run({"FT.SEARCH", "i2", "*", "RETURN", "1", "@a"}); + EXPECT_THAT(resp, IsMapWithSize("h1", IsMap(), "h2", IsMap())); + + // Search with LOAD nnn + resp = Run({"FT.SEARCH", "i2", "*", "LOAD", "1", "nnn"}); + EXPECT_THAT(resp, IsMapWithSize("h2", IsMap("nnn", "two", "a", "two"), "h1", + IsMap("nnn", "one", "a", "one"))); + + // Search with LOAD @nnn + resp = Run({"FT.SEARCH", "i2", "*", "LOAD", "1", "@nnn"}); + EXPECT_THAT(resp, IsMapWithSize("h2", IsMap("nnn", "two", "a", "two"), "h1", + IsMap("nnn", "one", "a", "one"))); + + // Search with LOAD a + resp = Run({"FT.SEARCH", "i2", "*", "LOAD", "1", "a"}); + EXPECT_THAT(resp, IsMapWithSize("h2", IsMap("a", "two"), "h1", IsMap("a", "one"))); + + // Search with LOAD @a + resp = Run({"FT.SEARCH", "i2", "*", "LOAD", "1", "@a"}); + EXPECT_THAT(resp, IsMapWithSize("h2", IsMap("a", "two"), "h1", IsMap("a", "one"))); +} + } // namespace dfly From 0e0e42edb69f35ccfec366ac53ce85d0ee44ac16 Mon Sep 17 00:00:00 2001 From: Stepan Bagritsevich Date: Sun, 3 Nov 2024 16:47:33 +0100 Subject: [PATCH 2/6] refactor: address comments Signed-off-by: Stepan Bagritsevich --- src/server/search/doc_accessors.cc | 10 +-- src/server/search/doc_accessors.h | 4 +- src/server/search/doc_index.cc | 13 ++-- src/server/search/doc_index.h | 100 +++++++++++------------------ src/server/search/search_family.cc | 15 +++-- 5 files changed, 60 insertions(+), 82 deletions(-) diff --git a/src/server/search/doc_accessors.cc b/src/server/search/doc_accessors.cc index 09732085d214..a7f02f00e906 100644 --- a/src/server/search/doc_accessors.cc +++ b/src/server/search/doc_accessors.cc @@ -80,8 +80,8 @@ FieldValue ExtractSortableValueFromJson(const search::Schema& schema, string_vie } // namespace -SearchDocData BaseAccessor::Serialize( - const search::Schema& schema, absl::Span> fields) const { +SearchDocData BaseAccessor::Serialize(const search::Schema& schema, + absl::Span fields) const { SearchDocData out{}; for (const auto& field : fields) { const auto& fident = field.GetIdentifier(schema, false); @@ -351,12 +351,12 @@ JsonAccessor::JsonPathContainer* JsonAccessor::GetPath(std::string_view field) c SearchDocData JsonAccessor::Serialize(const search::Schema& schema) const { SearchFieldsList fields{}; for (const auto& [fname, fident] : schema.field_names) - fields.emplace_back(fident, NameType::kIdentifier, fname); + fields.emplace_back(StringOrView::FromView(fident), false, StringOrView::FromView(fname)); return Serialize(schema, fields); } -SearchDocData JsonAccessor::Serialize( - const search::Schema& schema, absl::Span> fields) const { +SearchDocData JsonAccessor::Serialize(const search::Schema& schema, + absl::Span fields) const { SearchDocData out{}; for (const auto& field : fields) { const auto& ident = field.GetIdentifier(schema, true); diff --git a/src/server/search/doc_accessors.h b/src/server/search/doc_accessors.h index 4a6dea1c0c71..f4f0baca64b2 100644 --- a/src/server/search/doc_accessors.h +++ b/src/server/search/doc_accessors.h @@ -30,7 +30,7 @@ struct BaseAccessor : public search::DocumentAccessor { // Serialize selected fields virtual SearchDocData Serialize(const search::Schema& schema, - absl::Span> fields) const; + absl::Span fields) const; /* Serialize the whole type, the default implementation is to serialize all fields. @@ -84,7 +84,7 @@ struct JsonAccessor : public BaseAccessor { // The JsonAccessor works with structured types and not plain strings, so an overload is needed SearchDocData Serialize(const search::Schema& schema, - absl::Span> fields) const override; + absl::Span fields) const override; SearchDocData Serialize(const search::Schema& schema) const override; SearchDocData SerializeDocument(const search::Schema& schema) const override; diff --git a/src/server/search/doc_index.cc b/src/server/search/doc_index.cc index 9cc6328ded70..2c6d0633cd4b 100644 --- a/src/server/search/doc_index.cc +++ b/src/server/search/doc_index.cc @@ -230,7 +230,7 @@ SearchFieldsList ToSV(const search::Schema& schema, if (fields) { sv_fields.reserve(fields->size()); for (const auto& field : fields.value()) { - sv_fields.emplace_back(field); + sv_fields.push_back(field.View()); } } return sv_fields; @@ -302,7 +302,7 @@ vector ShardDocIndex::SearchForAggregator( absl::flat_hash_map sort_indicies_aliases; if (params.load_fields) { for (const auto& field : params.load_fields.value()) { - auto ident = field.GetIdentifier(base_->schema); + auto ident = field.GetIdentifier(base_->schema, false); if (sort_indicies_fields.contains(ident)) { sort_indicies_aliases[ident] = field.GetShortName(); } @@ -332,21 +332,20 @@ vector ShardDocIndex::SearchForAggregator( SearchFieldsList ShardDocIndex::GetFieldsToLoad( const std::optional& load_fields, const absl::flat_hash_set& skip_fields) const { - absl::flat_hash_map> unique_fields; + absl::flat_hash_map unique_fields; unique_fields.reserve(base_->schema.field_names.size()); for (const auto& [fname, fident] : base_->schema.field_names) { if (!skip_fields.contains(fident)) { - unique_fields[fident] = {std::string_view{fident}, NameType::kIdentifier, - std::string_view{fname}}; + unique_fields[fident] = {StringOrView::FromView(fident), true, StringOrView::FromView(fname)}; } } if (load_fields) { for (const auto& field : load_fields.value()) { - const auto& fident = field.GetIdentifier(base_->schema); + const auto& fident = field.GetIdentifier(base_->schema, false); if (!skip_fields.contains(fident)) { - unique_fields[fident] = field; + unique_fields[fident] = field.View(); } } } diff --git a/src/server/search/doc_index.h b/src/server/search/doc_index.h index 1e163ff33c1a..43b864a8eb74 100644 --- a/src/server/search/doc_index.h +++ b/src/server/search/doc_index.h @@ -52,13 +52,13 @@ struct SearchResult { std::optional error; }; -enum class NameType : uint8_t { kIdentifier, kShortName, kUndefined }; - -template class SearchField { +/* SearchField represents a field that can store combinations of identifiers and aliases in various + forms: [identifier and alias], [alias and new_alias], [new identifier and alias] (used for JSON + data) This class provides methods to retrieve the actual identifier and alias for a field, + handling different naming conventions and resolving names based on the schema. */ +class SearchField { private: - using SingleName = std::pair; - - static bool IsJsonPath(const T& name) { + static bool IsJsonPath(std::string_view name) { if (name.size() < 2) { return false; } @@ -68,89 +68,67 @@ template class SearchField { public: SearchField() = default; - SearchField(T name, NameType name_type) : name_(std::make_pair(std::move(name), name_type)) { - } - - SearchField(T name, NameType name_type, T new_alias) - : name_(std::make_pair(std::move(name), name_type)), new_alias_(std::move(new_alias)) { - } - - template >> - explicit SearchField(const SearchField& other) - : name_(std::make_pair(T{other.name_.first}, other.name_.second)) { - if (other.HasNewAlias()) { - new_alias_ = T{other.new_alias_.value()}; - } else { - new_alias_.reset(); - } - } - - template >> - SearchField& operator=(const SearchField& other) { - name_ = std::make_pair(T{other.name_.first}, other.name_.second); - if (other.HasNewAlias()) { - new_alias_ = T{other.new_alias_.value()}; - } else { - new_alias_.reset(); - } - return *this; + SearchField(StringOrView name, bool is_short_name) + : name_(std::move(name)), is_short_name_(is_short_name) { } - ~SearchField() = default; - - std::string_view GetIdentifier(const search::Schema& schema) const { - return GetIdentifier(schema, [&](const SingleName& single_name) { - return single_name.second == NameType::kIdentifier; - }); + SearchField(StringOrView name, bool is_short_name, StringOrView new_alias) + : name_(std::move(name)), is_short_name_(is_short_name), new_alias_(std::move(new_alias)) { } std::string_view GetIdentifier(const search::Schema& schema, bool is_json_field) const { - return GetIdentifier(schema, [&](const SingleName& single_name) { - return single_name.second == NameType::kIdentifier || - (is_json_field && IsJsonPath(single_name.first)); - }); + auto as_view = NameView(); + if (!is_short_name_ || (is_json_field && IsJsonPath(as_view))) { + return as_view; + } + return schema.LookupAlias(as_view); } std::string_view GetShortName() const { if (HasNewAlias()) { - return new_alias_.value(); + return AliasView(); } - return name_.first; + return NameView(); } std::string_view GetShortName(const search::Schema& schema) const { if (HasNewAlias()) { - return new_alias_.value(); + return AliasView(); } - - if (name_.second == NameType::kShortName) { - return name_.first; - } - return schema.LookupIdentifier(std::string_view{name_.first}); + return is_short_name_ ? NameView() : schema.LookupIdentifier(NameView()); } - private: - template - std::string_view GetIdentifier(const search::Schema& schema, Callback is_identifier) const { - if (is_identifier(name_)) { - return name_.first; + /* Returns a new SearchField instance with name and alias stored as views to the values in this + * SearchField */ + SearchField View() const { + if (HasNewAlias()) { + return SearchField{StringOrView::FromView(NameView()), is_short_name_, + StringOrView::FromView(AliasView())}; } - return schema.LookupAlias(std::string_view{name_.first}); + return SearchField{StringOrView::FromView(NameView()), is_short_name_}; } + private: bool HasNewAlias() const { return new_alias_.has_value(); } - template friend class SearchField; + std::string_view NameView() const { + return name_.view(); + } + + std::string_view AliasView() const { + return new_alias_.value().view(); + } private: - SingleName name_; - std::optional new_alias_; + StringOrView name_; + bool is_short_name_; + std::optional new_alias_; }; -using SearchFieldsList = std::vector>; -using OwnedSearchFieldsList = std::vector>; +using SearchFieldsList = std::vector; +using OwnedSearchFieldsList = std::vector; struct SearchParams { // Parameters for "LIMIT offset total": select total amount documents with a specific offset from diff --git a/src/server/search/search_family.cc b/src/server/search/search_family.cc index c7d17d10e303..6c7296133618 100644 --- a/src/server/search/search_family.cc +++ b/src/server/search/search_family.cc @@ -221,11 +221,12 @@ void ParseLoadFields(CmdArgParser* parser, std::optional* str.remove_prefix(1); // remove leading @ } + StringOrView name = StringOrView::FromString(std::string{str}); if (parser->Check("AS")) { - load_fields->value().emplace_back(std::string{str}, NameType::kShortName, - std::string{parser->Next()}); + load_fields->value().emplace_back(name, true, + StringOrView::FromString(std::string{parser->Next()})); } else { - load_fields->value().emplace_back(std::string{str}, NameType::kShortName); + load_fields->value().emplace_back(name, true); } } } @@ -266,13 +267,13 @@ optional ParseSearchParamsOrReply(CmdArgParser* parser, SinkReplyB size_t num_fields = parser->Next(); params.return_fields.emplace(); while (params.return_fields->size() < num_fields) { - std::string_view str = parser->Next(); + StringOrView name = StringOrView::FromString(std::string{parser->Next()}); if (parser->Check("AS")) { - params.return_fields->emplace_back(std::string{str}, NameType::kShortName, - std::string{parser->Next()}); + params.return_fields->emplace_back(std::move(name), true, + StringOrView::FromString(std::string{parser->Next()})); } else { - params.return_fields->emplace_back(std::string{str}, NameType::kShortName); + params.return_fields->emplace_back(std::move(name), true); } } } else if (parser->Check("NOCONTENT")) { // NOCONTENT From 26f0c6df6c1236800e2a5388c0d9a0e071b81835 Mon Sep 17 00:00:00 2001 From: Stepan Bagritsevich Date: Mon, 18 Nov 2024 15:51:59 +0100 Subject: [PATCH 3/6] refactor(search_family): Address comments 2 Signed-off-by: Stepan Bagritsevich --- src/core/search/search.cc | 22 +----- src/core/search/search.h | 13 +--- src/server/search/doc_index.cc | 109 ++++++++++++++++------------- src/server/search/doc_index.h | 13 +--- src/server/search/search_family.cc | 5 +- 5 files changed, 74 insertions(+), 88 deletions(-) diff --git a/src/core/search/search.cc b/src/core/search/search.cc index 123f8fa2c695..a2bd9f56e573 100644 --- a/src/core/search/search.cc +++ b/src/core/search/search.cc @@ -652,25 +652,9 @@ const Schema& FieldIndices::GetSchema() const { return schema_; } -vector> FieldIndices::ExtractStoredValues( - DocId doc, const absl::flat_hash_map& aliases) const { - vector> out; - for (const auto& [ident, index] : sort_indices_) { - const auto& it = aliases.find(ident); - const auto& name = it == aliases.end() ? schema_.LookupIdentifier(ident) : it->second; - - out.emplace_back(name, index->Lookup(doc)); - } - return out; -} - -absl::flat_hash_set FieldIndices::GetSortIndiciesFields() const { - absl::flat_hash_set fields_idents; - fields_idents.reserve(sort_indices_.size()); - for (const auto& [ident, _] : sort_indices_) { - fields_idents.insert(ident); - } - return fields_idents; +SortableValue FieldIndices::GetSortIndexValue(DocId doc, std::string_view field_identifier) const { + auto it = sort_indices_.find(field_identifier); + return it->second->Lookup(doc); } SearchAlgorithm::SearchAlgorithm() = default; diff --git a/src/core/search/search.h b/src/core/search/search.h index 08ef30c9b21b..2c2b6915a6f5 100644 --- a/src/core/search/search.h +++ b/src/core/search/search.h @@ -91,14 +91,7 @@ class FieldIndices { const std::vector& GetAllDocs() const; const Schema& GetSchema() const; - // Extract values stored in sort indices - // aliases are specified in addition to the aliases in search::Schema - std::vector> ExtractStoredValues( - DocId doc, - const absl::flat_hash_map& - aliases) const; - - absl::flat_hash_set GetSortIndiciesFields() const; + SortableValue GetSortIndexValue(DocId doc, std::string_view field_identifier) const; private: void CreateIndices(PMR_NS::memory_resource* mr); @@ -107,8 +100,8 @@ class FieldIndices { const Schema& schema_; const IndicesOptions& options_; std::vector all_ids_; - absl::flat_hash_map> indices_; - absl::flat_hash_map> sort_indices_; + absl::flat_hash_map> indices_; + absl::flat_hash_map> sort_indices_; }; struct AlgorithmProfile { diff --git a/src/server/search/doc_index.cc b/src/server/search/doc_index.cc index 2c6d0633cd4b..99a6c909f262 100644 --- a/src/server/search/doc_index.cc +++ b/src/server/search/doc_index.cc @@ -224,8 +224,7 @@ bool ShardDocIndex::Matches(string_view key, unsigned obj_code) const { return base_->Matches(key, obj_code); } -SearchFieldsList ToSV(const search::Schema& schema, - const std::optional& fields) { +SearchFieldsList ToSV(const search::Schema& schema, const std::optional& fields) { SearchFieldsList sv_fields; if (fields) { sv_fields.reserve(fields->size()); @@ -286,6 +285,57 @@ SearchResult ShardDocIndex::Search(const OpArgs& op_args, const SearchParams& pa std::move(search_results.profile)}; } +using SortIndiciesFieldsList = + std::vector>; + +std::pair PreprocessAggregateFields( + const search::Schema& schema, const AggregateParams& params, + const std::optional& load_fields) { + auto is_sortable = [&schema](std::string_view fident) { + auto it = schema.fields.find(fident); + return it != schema.fields.end() && (it->second.flags & search::SchemaField::SORTABLE); + }; + + absl::flat_hash_map fields_by_identifier; + absl::flat_hash_map sort_indicies_aliases; + fields_by_identifier.reserve(schema.field_names.size()); + sort_indicies_aliases.reserve(schema.field_names.size()); + + for (const auto& [fname, fident] : schema.field_names) { + if (!is_sortable(fident)) { + fields_by_identifier[fident] = {StringOrView::FromView(fident), true, + StringOrView::FromView(fname)}; + } else { + sort_indicies_aliases[fident] = fname; + } + } + + if (load_fields) { + for (const auto& field : load_fields.value()) { + const auto& fident = field.GetIdentifier(schema, false); + if (!is_sortable(fident)) { + fields_by_identifier[fident] = field.View(); + } else { + sort_indicies_aliases[fident] = field.GetShortName(); + } + } + } + + SearchFieldsList fields; + fields.reserve(fields_by_identifier.size()); + for (auto& [_, field] : fields_by_identifier) { + fields.emplace_back(std::move(field)); + } + + SortIndiciesFieldsList sort_fields; + sort_fields.reserve(sort_indicies_aliases.size()); + for (auto& [fident, fname] : sort_indicies_aliases) { + sort_fields.emplace_back(fident, fname); + } + + return {fields, sort_fields}; +} + vector ShardDocIndex::SearchForAggregator( const OpArgs& op_args, const AggregateParams& params, search::SearchAlgorithm* search_algo) const { @@ -295,19 +345,8 @@ vector ShardDocIndex::SearchForAggregator( if (!search_results.error.empty()) return {}; - auto sort_indicies_fields = indices_->GetSortIndiciesFields(); - SearchFieldsList fields_to_load = GetFieldsToLoad(params.load_fields, sort_indicies_fields); - - // aliases for ExtractStoredValues - absl::flat_hash_map sort_indicies_aliases; - if (params.load_fields) { - for (const auto& field : params.load_fields.value()) { - auto ident = field.GetIdentifier(base_->schema, false); - if (sort_indicies_fields.contains(ident)) { - sort_indicies_aliases[ident] = field.GetShortName(); - } - } - } + auto [fields_to_load, sort_indicies] = + PreprocessAggregateFields(base_->schema, params, params.load_fields); vector> out; for (DocId doc : search_results.ids) { @@ -318,47 +357,23 @@ vector ShardDocIndex::SearchForAggregator( continue; auto accessor = GetAccessor(op_args.db_cntx, (*it)->second); - auto extracted = indices_->ExtractStoredValues(doc, sort_indicies_aliases); + + SearchDocData extracted_sort_indicies; + extracted_sort_indicies.reserve(sort_indicies.size()); + for (const auto& [fident, fname] : sort_indicies) { + extracted_sort_indicies[fname] = indices_->GetSortIndexValue(doc, fident); + } SearchDocData loaded = accessor->Serialize(base_->schema, fields_to_load); - out.emplace_back(make_move_iterator(extracted.begin()), make_move_iterator(extracted.end())); + out.emplace_back(make_move_iterator(extracted_sort_indicies.begin()), + make_move_iterator(extracted_sort_indicies.end())); out.back().insert(make_move_iterator(loaded.begin()), make_move_iterator(loaded.end())); } return out; } -SearchFieldsList ShardDocIndex::GetFieldsToLoad( - const std::optional& load_fields, - const absl::flat_hash_set& skip_fields) const { - absl::flat_hash_map unique_fields; - unique_fields.reserve(base_->schema.field_names.size()); - - for (const auto& [fname, fident] : base_->schema.field_names) { - if (!skip_fields.contains(fident)) { - unique_fields[fident] = {StringOrView::FromView(fident), true, StringOrView::FromView(fname)}; - } - } - - if (load_fields) { - for (const auto& field : load_fields.value()) { - const auto& fident = field.GetIdentifier(base_->schema, false); - if (!skip_fields.contains(fident)) { - unique_fields[fident] = field.View(); - } - } - } - - SearchFieldsList fields; - fields.reserve(unique_fields.size()); - for (auto& [_, field] : unique_fields) { - fields.emplace_back(std::move(field)); - } - - return fields; -} - DocIndexInfo ShardDocIndex::GetInfo() const { return {*base_, key_index_.Size()}; } diff --git a/src/server/search/doc_index.h b/src/server/search/doc_index.h index 43b864a8eb74..1bceb75cec21 100644 --- a/src/server/search/doc_index.h +++ b/src/server/search/doc_index.h @@ -128,7 +128,6 @@ class SearchField { }; using SearchFieldsList = std::vector; -using OwnedSearchFieldsList = std::vector; struct SearchParams { // Parameters for "LIMIT offset total": select total amount documents with a specific offset from @@ -141,14 +140,14 @@ struct SearchParams { 2. If set but empty -> no fields should be returned 3. If set and not empty -> return only these fields */ - std::optional return_fields; + std::optional return_fields; /* Fields that should be also loaded from the document. Only one of load_fields and return_fields should be set. */ - std::optional load_fields; + std::optional load_fields; std::optional sort_option; search::QueryParams query_params; @@ -168,7 +167,7 @@ struct AggregateParams { std::string_view index, query; search::QueryParams params; - std::optional load_fields; + std::optional load_fields; std::vector steps; }; @@ -242,12 +241,6 @@ class ShardDocIndex { io::Result GetTagVals(std::string_view field) const; private: - /* Returns the fields that are the union of the already indexed fields and load_fields, excluding - skip_fields. - Load_fields should not be destroyed while the result of this function is being used */ - SearchFieldsList GetFieldsToLoad(const std::optional& load_fields, - const absl::flat_hash_set& skip_fields) const; - // Clears internal data. Traverses all matching documents and assigns ids. void Rebuild(const OpArgs& op_args, PMR_NS::memory_resource* mr); diff --git a/src/server/search/search_family.cc b/src/server/search/search_family.cc index 6c7296133618..6c1b4f36a158 100644 --- a/src/server/search/search_family.cc +++ b/src/server/search/search_family.cc @@ -207,7 +207,7 @@ std::string_view ParseFieldWithAtSign(CmdArgParser* parser) { return field; } -void ParseLoadFields(CmdArgParser* parser, std::optional* load_fields) { +void ParseLoadFields(CmdArgParser* parser, std::optional* load_fields) { // TODO: Change to num_strings. In Redis strings number is expected. For example: LOAD 3 $.a AS a size_t num_fields = parser->Next(); if (!load_fields->has_value()) { @@ -263,7 +263,8 @@ optional ParseSearchParamsOrReply(CmdArgParser* parser, SinkReplyB } // RETURN {num} [{ident} AS {name}...] - /* TODO: Change to num_strings. In Redis strings number is expected. For example: RETURN 3 $.a AS a */ + /* TODO: Change to num_strings. In Redis strings number is expected. For example: RETURN 3 $.a + * AS a */ size_t num_fields = parser->Next(); params.return_fields.emplace(); while (params.return_fields->size() < num_fields) { From e9c17b6f64ad9ddd0e7eee6d45e5ec93cba6460d Mon Sep 17 00:00:00 2001 From: Stepan Bagritsevich Date: Tue, 19 Nov 2024 12:14:10 +0100 Subject: [PATCH 4/6] refactor(search_family): address comments 3 Signed-off-by: Stepan Bagritsevich --- src/core/string_or_view.h | 4 ++++ src/server/search/doc_index.h | 6 +++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/core/string_or_view.h b/src/core/string_or_view.h index a12406344ac7..ab2b16361ba7 100644 --- a/src/core/string_or_view.h +++ b/src/core/string_or_view.h @@ -65,6 +65,10 @@ class StringOrView { val_ = std::string{std::get(val_)}; } + bool empty() const { + return visit([](const auto& s) { return s.empty(); }, val_); + } + private: std::variant val_; }; diff --git a/src/server/search/doc_index.h b/src/server/search/doc_index.h index 1bceb75cec21..6b5a2da6cf7d 100644 --- a/src/server/search/doc_index.h +++ b/src/server/search/doc_index.h @@ -110,7 +110,7 @@ class SearchField { private: bool HasNewAlias() const { - return new_alias_.has_value(); + return !new_alias_.empty(); } std::string_view NameView() const { @@ -118,13 +118,13 @@ class SearchField { } std::string_view AliasView() const { - return new_alias_.value().view(); + return new_alias_.view(); } private: StringOrView name_; bool is_short_name_; - std::optional new_alias_; + StringOrView new_alias_; }; using SearchFieldsList = std::vector; From c74d4a36085725d130ba189adadc484156244eeb Mon Sep 17 00:00:00 2001 From: Stepan Bagritsevich Date: Wed, 20 Nov 2024 08:29:19 +0400 Subject: [PATCH 5/6] refactor(search_family): address comments 4 Signed-off-by: Stepan Bagritsevich --- src/server/search/doc_index.cc | 2 +- src/server/search/search_family.cc | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/server/search/doc_index.cc b/src/server/search/doc_index.cc index 99a6c909f262..e93095eaa7aa 100644 --- a/src/server/search/doc_index.cc +++ b/src/server/search/doc_index.cc @@ -333,7 +333,7 @@ std::pair PreprocessAggregateFields( sort_fields.emplace_back(fident, fname); } - return {fields, sort_fields}; + return {std::move(fields), std::move(sort_fields)}; } vector ShardDocIndex::SearchForAggregator( diff --git a/src/server/search/search_family.cc b/src/server/search/search_family.cc index 6c1b4f36a158..65c9665256aa 100644 --- a/src/server/search/search_family.cc +++ b/src/server/search/search_family.cc @@ -224,7 +224,7 @@ void ParseLoadFields(CmdArgParser* parser, std::optional* load StringOrView name = StringOrView::FromString(std::string{str}); if (parser->Check("AS")) { load_fields->value().emplace_back(name, true, - StringOrView::FromString(std::string{parser->Next()})); + StringOrView::FromString(parser->Next())); } else { load_fields->value().emplace_back(name, true); } @@ -268,11 +268,11 @@ optional ParseSearchParamsOrReply(CmdArgParser* parser, SinkReplyB size_t num_fields = parser->Next(); params.return_fields.emplace(); while (params.return_fields->size() < num_fields) { - StringOrView name = StringOrView::FromString(std::string{parser->Next()}); + StringOrView name = StringOrView::FromString(parser->Next()); if (parser->Check("AS")) { params.return_fields->emplace_back(std::move(name), true, - StringOrView::FromString(std::string{parser->Next()})); + StringOrView::FromString(parser->Next())); } else { params.return_fields->emplace_back(std::move(name), true); } @@ -283,7 +283,8 @@ optional ParseSearchParamsOrReply(CmdArgParser* parser, SinkReplyB } else if (parser->Check("PARAMS")) { // [PARAMS num(ignored) name(ignored) knn_vector] params.query_params = ParseQueryParams(parser); } else if (parser->Check("SORTBY")) { - params.sort_option = search::SortOption{string{parser->Next()}, bool(parser->Check("DESC"))}; + params.sort_option = + search::SortOption{parser->Next(), bool(parser->Check("DESC"))}; } else { // Unsupported parameters are ignored for now parser->Skip(1); From 75b4df80c18815433c92ee91bf68231276400ae7 Mon Sep 17 00:00:00 2001 From: Stepan Bagritsevich Date: Mon, 25 Nov 2024 13:08:52 +0400 Subject: [PATCH 6/6] refactor(search_family): address comments 5 Signed-off-by: Stepan Bagritsevich --- src/core/search/search.cc | 1 + src/server/search/search_family.cc | 10 +++------- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/src/core/search/search.cc b/src/core/search/search.cc index a2bd9f56e573..c62be8096334 100644 --- a/src/core/search/search.cc +++ b/src/core/search/search.cc @@ -654,6 +654,7 @@ const Schema& FieldIndices::GetSchema() const { SortableValue FieldIndices::GetSortIndexValue(DocId doc, std::string_view field_identifier) const { auto it = sort_indices_.find(field_identifier); + DCHECK(it != sort_indices_.end()); return it->second->Lookup(doc); } diff --git a/src/server/search/search_family.cc b/src/server/search/search_family.cc index 65c9665256aa..c7c12176a45c 100644 --- a/src/server/search/search_family.cc +++ b/src/server/search/search_family.cc @@ -183,13 +183,9 @@ optional ParseSchemaOrReply(DocIndex::DataType type, CmdArgParse #pragma GCC diagnostic pop #endif -bool StartsWithAtSign(std::string_view field) { - return !field.empty() && field.front() == '@'; -} - std::string_view ParseField(CmdArgParser* parser) { std::string_view field = parser->Next(); - if (StartsWithAtSign(field)) { + if (absl::StartsWith(field, "@"sv)) { field.remove_prefix(1); // remove leading @ if exists } return field; @@ -197,7 +193,7 @@ std::string_view ParseField(CmdArgParser* parser) { std::string_view ParseFieldWithAtSign(CmdArgParser* parser) { std::string_view field = parser->Next(); - if (StartsWithAtSign(field)) { + if (absl::StartsWith(field, "@"sv)) { field.remove_prefix(1); // remove leading @ } else { // Temporary warning until we can throw an error @@ -217,7 +213,7 @@ void ParseLoadFields(CmdArgParser* parser, std::optional* load while (num_fields--) { string_view str = parser->Next(); - if (StartsWithAtSign(str)) { + if (absl::StartsWith(str, "@"sv)) { str.remove_prefix(1); // remove leading @ }