From a62cf79f2fa779bb68ecc0c3613001bfb6bf99b4 Mon Sep 17 00:00:00 2001 From: Stepan Bagritsevich Date: Mon, 18 Nov 2024 15:51:59 +0100 Subject: [PATCH] refactor(search_family): Address comments 2 Signed-off-by: Stepan Bagritsevich --- src/core/search/search.cc | 22 +----- src/core/search/search.h | 13 +--- src/server/search/doc_index.cc | 109 ++++++++++++++++------------- src/server/search/doc_index.h | 13 +--- src/server/search/search_family.cc | 5 +- 5 files changed, 74 insertions(+), 88 deletions(-) diff --git a/src/core/search/search.cc b/src/core/search/search.cc index 123f8fa2c695..a2bd9f56e573 100644 --- a/src/core/search/search.cc +++ b/src/core/search/search.cc @@ -652,25 +652,9 @@ const Schema& FieldIndices::GetSchema() const { return schema_; } -vector> FieldIndices::ExtractStoredValues( - DocId doc, const absl::flat_hash_map& aliases) const { - vector> out; - for (const auto& [ident, index] : sort_indices_) { - const auto& it = aliases.find(ident); - const auto& name = it == aliases.end() ? schema_.LookupIdentifier(ident) : it->second; - - out.emplace_back(name, index->Lookup(doc)); - } - return out; -} - -absl::flat_hash_set FieldIndices::GetSortIndiciesFields() const { - absl::flat_hash_set fields_idents; - fields_idents.reserve(sort_indices_.size()); - for (const auto& [ident, _] : sort_indices_) { - fields_idents.insert(ident); - } - return fields_idents; +SortableValue FieldIndices::GetSortIndexValue(DocId doc, std::string_view field_identifier) const { + auto it = sort_indices_.find(field_identifier); + return it->second->Lookup(doc); } SearchAlgorithm::SearchAlgorithm() = default; diff --git a/src/core/search/search.h b/src/core/search/search.h index 08ef30c9b21b..2c2b6915a6f5 100644 --- a/src/core/search/search.h +++ b/src/core/search/search.h @@ -91,14 +91,7 @@ class FieldIndices { const std::vector& GetAllDocs() const; const Schema& GetSchema() const; - // Extract values stored in sort indices - // aliases are specified in addition to the aliases in search::Schema - std::vector> ExtractStoredValues( - DocId doc, - const absl::flat_hash_map& - aliases) const; - - absl::flat_hash_set GetSortIndiciesFields() const; + SortableValue GetSortIndexValue(DocId doc, std::string_view field_identifier) const; private: void CreateIndices(PMR_NS::memory_resource* mr); @@ -107,8 +100,8 @@ class FieldIndices { const Schema& schema_; const IndicesOptions& options_; std::vector all_ids_; - absl::flat_hash_map> indices_; - absl::flat_hash_map> sort_indices_; + absl::flat_hash_map> indices_; + absl::flat_hash_map> sort_indices_; }; struct AlgorithmProfile { diff --git a/src/server/search/doc_index.cc b/src/server/search/doc_index.cc index 2c6d0633cd4b..99a6c909f262 100644 --- a/src/server/search/doc_index.cc +++ b/src/server/search/doc_index.cc @@ -224,8 +224,7 @@ bool ShardDocIndex::Matches(string_view key, unsigned obj_code) const { return base_->Matches(key, obj_code); } -SearchFieldsList ToSV(const search::Schema& schema, - const std::optional& fields) { +SearchFieldsList ToSV(const search::Schema& schema, const std::optional& fields) { SearchFieldsList sv_fields; if (fields) { sv_fields.reserve(fields->size()); @@ -286,6 +285,57 @@ SearchResult ShardDocIndex::Search(const OpArgs& op_args, const SearchParams& pa std::move(search_results.profile)}; } +using SortIndiciesFieldsList = + std::vector>; + +std::pair PreprocessAggregateFields( + const search::Schema& schema, const AggregateParams& params, + const std::optional& load_fields) { + auto is_sortable = [&schema](std::string_view fident) { + auto it = schema.fields.find(fident); + return it != schema.fields.end() && (it->second.flags & search::SchemaField::SORTABLE); + }; + + absl::flat_hash_map fields_by_identifier; + absl::flat_hash_map sort_indicies_aliases; + fields_by_identifier.reserve(schema.field_names.size()); + sort_indicies_aliases.reserve(schema.field_names.size()); + + for (const auto& [fname, fident] : schema.field_names) { + if (!is_sortable(fident)) { + fields_by_identifier[fident] = {StringOrView::FromView(fident), true, + StringOrView::FromView(fname)}; + } else { + sort_indicies_aliases[fident] = fname; + } + } + + if (load_fields) { + for (const auto& field : load_fields.value()) { + const auto& fident = field.GetIdentifier(schema, false); + if (!is_sortable(fident)) { + fields_by_identifier[fident] = field.View(); + } else { + sort_indicies_aliases[fident] = field.GetShortName(); + } + } + } + + SearchFieldsList fields; + fields.reserve(fields_by_identifier.size()); + for (auto& [_, field] : fields_by_identifier) { + fields.emplace_back(std::move(field)); + } + + SortIndiciesFieldsList sort_fields; + sort_fields.reserve(sort_indicies_aliases.size()); + for (auto& [fident, fname] : sort_indicies_aliases) { + sort_fields.emplace_back(fident, fname); + } + + return {fields, sort_fields}; +} + vector ShardDocIndex::SearchForAggregator( const OpArgs& op_args, const AggregateParams& params, search::SearchAlgorithm* search_algo) const { @@ -295,19 +345,8 @@ vector ShardDocIndex::SearchForAggregator( if (!search_results.error.empty()) return {}; - auto sort_indicies_fields = indices_->GetSortIndiciesFields(); - SearchFieldsList fields_to_load = GetFieldsToLoad(params.load_fields, sort_indicies_fields); - - // aliases for ExtractStoredValues - absl::flat_hash_map sort_indicies_aliases; - if (params.load_fields) { - for (const auto& field : params.load_fields.value()) { - auto ident = field.GetIdentifier(base_->schema, false); - if (sort_indicies_fields.contains(ident)) { - sort_indicies_aliases[ident] = field.GetShortName(); - } - } - } + auto [fields_to_load, sort_indicies] = + PreprocessAggregateFields(base_->schema, params, params.load_fields); vector> out; for (DocId doc : search_results.ids) { @@ -318,47 +357,23 @@ vector ShardDocIndex::SearchForAggregator( continue; auto accessor = GetAccessor(op_args.db_cntx, (*it)->second); - auto extracted = indices_->ExtractStoredValues(doc, sort_indicies_aliases); + + SearchDocData extracted_sort_indicies; + extracted_sort_indicies.reserve(sort_indicies.size()); + for (const auto& [fident, fname] : sort_indicies) { + extracted_sort_indicies[fname] = indices_->GetSortIndexValue(doc, fident); + } SearchDocData loaded = accessor->Serialize(base_->schema, fields_to_load); - out.emplace_back(make_move_iterator(extracted.begin()), make_move_iterator(extracted.end())); + out.emplace_back(make_move_iterator(extracted_sort_indicies.begin()), + make_move_iterator(extracted_sort_indicies.end())); out.back().insert(make_move_iterator(loaded.begin()), make_move_iterator(loaded.end())); } return out; } -SearchFieldsList ShardDocIndex::GetFieldsToLoad( - const std::optional& load_fields, - const absl::flat_hash_set& skip_fields) const { - absl::flat_hash_map unique_fields; - unique_fields.reserve(base_->schema.field_names.size()); - - for (const auto& [fname, fident] : base_->schema.field_names) { - if (!skip_fields.contains(fident)) { - unique_fields[fident] = {StringOrView::FromView(fident), true, StringOrView::FromView(fname)}; - } - } - - if (load_fields) { - for (const auto& field : load_fields.value()) { - const auto& fident = field.GetIdentifier(base_->schema, false); - if (!skip_fields.contains(fident)) { - unique_fields[fident] = field.View(); - } - } - } - - SearchFieldsList fields; - fields.reserve(unique_fields.size()); - for (auto& [_, field] : unique_fields) { - fields.emplace_back(std::move(field)); - } - - return fields; -} - DocIndexInfo ShardDocIndex::GetInfo() const { return {*base_, key_index_.Size()}; } diff --git a/src/server/search/doc_index.h b/src/server/search/doc_index.h index 43b864a8eb74..1bceb75cec21 100644 --- a/src/server/search/doc_index.h +++ b/src/server/search/doc_index.h @@ -128,7 +128,6 @@ class SearchField { }; using SearchFieldsList = std::vector; -using OwnedSearchFieldsList = std::vector; struct SearchParams { // Parameters for "LIMIT offset total": select total amount documents with a specific offset from @@ -141,14 +140,14 @@ struct SearchParams { 2. If set but empty -> no fields should be returned 3. If set and not empty -> return only these fields */ - std::optional return_fields; + std::optional return_fields; /* Fields that should be also loaded from the document. Only one of load_fields and return_fields should be set. */ - std::optional load_fields; + std::optional load_fields; std::optional sort_option; search::QueryParams query_params; @@ -168,7 +167,7 @@ struct AggregateParams { std::string_view index, query; search::QueryParams params; - std::optional load_fields; + std::optional load_fields; std::vector steps; }; @@ -242,12 +241,6 @@ class ShardDocIndex { io::Result GetTagVals(std::string_view field) const; private: - /* Returns the fields that are the union of the already indexed fields and load_fields, excluding - skip_fields. - Load_fields should not be destroyed while the result of this function is being used */ - SearchFieldsList GetFieldsToLoad(const std::optional& load_fields, - const absl::flat_hash_set& skip_fields) const; - // Clears internal data. Traverses all matching documents and assigns ids. void Rebuild(const OpArgs& op_args, PMR_NS::memory_resource* mr); diff --git a/src/server/search/search_family.cc b/src/server/search/search_family.cc index 6c7296133618..6c1b4f36a158 100644 --- a/src/server/search/search_family.cc +++ b/src/server/search/search_family.cc @@ -207,7 +207,7 @@ std::string_view ParseFieldWithAtSign(CmdArgParser* parser) { return field; } -void ParseLoadFields(CmdArgParser* parser, std::optional* load_fields) { +void ParseLoadFields(CmdArgParser* parser, std::optional* load_fields) { // TODO: Change to num_strings. In Redis strings number is expected. For example: LOAD 3 $.a AS a size_t num_fields = parser->Next(); if (!load_fields->has_value()) { @@ -263,7 +263,8 @@ optional ParseSearchParamsOrReply(CmdArgParser* parser, SinkReplyB } // RETURN {num} [{ident} AS {name}...] - /* TODO: Change to num_strings. In Redis strings number is expected. For example: RETURN 3 $.a AS a */ + /* TODO: Change to num_strings. In Redis strings number is expected. For example: RETURN 3 $.a + * AS a */ size_t num_fields = parser->Next(); params.return_fields.emplace(); while (params.return_fields->size() < num_fields) {