From e301066caf0cfd4c5e4f4c663d6bb60db185d39d Mon Sep 17 00:00:00 2001 From: kkewwei Date: Sat, 28 Dec 2024 07:29:45 +0800 Subject: [PATCH] Retrieve value from DocValues in a flat_object filed (#16802) optimize innerhit phase --- CHANGELOG.md | 1 + .../92_flat_object_support_doc_values.yml | 52 ++++++++++++++- .../search/fields/SearchFieldsIT.java | 22 +++---- .../index/mapper/DocValueFetcher.java | 6 +- .../index/mapper/DocumentMapper.java | 47 +++++++++----- .../index/mapper/FlatObjectFieldMapper.java | 63 ++++++++++++++++++- .../index/query/NestedQueryBuilder.java | 6 +- .../opensearch/search/fetch/FetchPhase.java | 12 ++-- .../mapper/FlatObjectFieldMapperTests.java | 22 +++++++ 9 files changed, 189 insertions(+), 42 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 44236c4bc25c0..a503728a44fda 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Added a precaution to handle extreme date values during sorting to prevent `arithmetic_exception: long overflow` ([#16812](https://github.com/opensearch-project/OpenSearch/pull/16812)). - Add search replica stats to segment replication stats API ([#16678](https://github.com/opensearch-project/OpenSearch/pull/16678)) - Introduce a setting to disable download of full cluster state from remote on term mismatch([#16798](https://github.com/opensearch-project/OpenSearch/pull/16798/)) +- Added ability to retrieve value from DocValues in a flat_object filed([#16802](https://github.com/opensearch-project/OpenSearch/pull/16802)) ### Dependencies - Bump `com.google.cloud:google-cloud-core-http` from 2.23.0 to 2.47.0 ([#16504](https://github.com/opensearch-project/OpenSearch/pull/16504)) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/index/92_flat_object_support_doc_values.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/index/92_flat_object_support_doc_values.yml index 9ec39660a4928..c840276ee1157 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/index/92_flat_object_support_doc_values.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/index/92_flat_object_support_doc_values.yml @@ -1,8 +1,9 @@ --- # The test setup includes: -# - Create flat_object mapping for flat_object_doc_values_test index -# - Index 9 example documents -# - Search tests about doc_values and index +# - 1.Create flat_object mapping for flat_object_doc_values_test index +# - 2.Index 9 example documents +# - 3.Search tests about doc_values and index +# - 4.Fetch doc_value from flat_object field setup: - skip: @@ -786,3 +787,48 @@ teardown: - length: { hits.hits: 1 } - match: { hits.hits.0._source.order: "order8" } + + # Stored Fields with exact dot path. + - do: + search: + body: { + _source: false, + query: { + bool: { + must: [ + { + term: { + order: "order0" + } + } + ] + } + }, + stored_fields: "_none_", + docvalue_fields: [ "issue.labels.name","order" ] + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0.fields: { "order" : [ "order0" ], "issue.labels.name": [ "abc0" ] } } + + - do: + search: + body: { + _source: false, + query: { + bool: { + must: [ + { + term: { + order: "order0" + } + } + ] + } + }, + stored_fields: "_none_", + docvalue_fields: [ "issue.labels.name" ] + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0.fields: { "issue.labels.name": [ "abc0" ] } } diff --git a/server/src/internalClusterTest/java/org/opensearch/search/fields/SearchFieldsIT.java b/server/src/internalClusterTest/java/org/opensearch/search/fields/SearchFieldsIT.java index 2ce96092203e8..60a6e59014e11 100644 --- a/server/src/internalClusterTest/java/org/opensearch/search/fields/SearchFieldsIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/search/fields/SearchFieldsIT.java @@ -1023,7 +1023,7 @@ public void testDocValueFields() throws Exception { .startObject("ip_field") .field("type", "ip") .endObject() - .startObject("flat_object_field") + .startObject("flat_object_field1") .field("type", "flat_object") .endObject() .endObject() @@ -1050,9 +1050,11 @@ public void testDocValueFields() throws Exception { .field("boolean_field", true) .field("binary_field", new byte[] { 42, 100 }) .field("ip_field", "::1") - .field("flat_object_field") + .field("flat_object_field1") .startObject() + .field("fooa", "bara") .field("foo", "bar") + .field("foob", "barb") .endObject() .endObject() ) @@ -1075,7 +1077,7 @@ public void testDocValueFields() throws Exception { .addDocValueField("boolean_field") .addDocValueField("binary_field") .addDocValueField("ip_field") - .addDocValueField("flat_object_field"); + .addDocValueField("flat_object_field1.foo"); SearchResponse searchResponse = builder.get(); assertThat(searchResponse.getHits().getTotalHits().value, equalTo(1L)); @@ -1097,7 +1099,7 @@ public void testDocValueFields() throws Exception { "keyword_field", "binary_field", "ip_field", - "flat_object_field" + "flat_object_field1.foo" ) ) ); @@ -1116,7 +1118,7 @@ public void testDocValueFields() throws Exception { assertThat(searchResponse.getHits().getAt(0).getFields().get("keyword_field").getValue(), equalTo("foo")); assertThat(searchResponse.getHits().getAt(0).getFields().get("binary_field").getValue(), equalTo("KmQ")); assertThat(searchResponse.getHits().getAt(0).getFields().get("ip_field").getValue(), equalTo("::1")); - assertThat(searchResponse.getHits().getAt(0).getFields().get("flat_object_field").getValue(), equalTo("flat_object_field.foo")); + assertThat(searchResponse.getHits().getAt(0).getFields().get("flat_object_field1.foo").getValue(), equalTo("bar")); builder = client().prepareSearch().setQuery(matchAllQuery()).addDocValueField("*field"); searchResponse = builder.get(); @@ -1139,8 +1141,7 @@ public void testDocValueFields() throws Exception { "text_field", "keyword_field", "binary_field", - "ip_field", - "flat_object_field" + "ip_field" ) ) ); @@ -1160,7 +1161,6 @@ public void testDocValueFields() throws Exception { assertThat(searchResponse.getHits().getAt(0).getFields().get("keyword_field").getValue(), equalTo("foo")); assertThat(searchResponse.getHits().getAt(0).getFields().get("binary_field").getValue(), equalTo("KmQ")); assertThat(searchResponse.getHits().getAt(0).getFields().get("ip_field").getValue(), equalTo("::1")); - assertThat(searchResponse.getHits().getAt(0).getFields().get("flat_object_field").getValue(), equalTo("flat_object_field.foo")); builder = client().prepareSearch() .setQuery(matchAllQuery()) @@ -1176,7 +1176,7 @@ public void testDocValueFields() throws Exception { .addDocValueField("boolean_field", "use_field_mapping") .addDocValueField("binary_field", "use_field_mapping") .addDocValueField("ip_field", "use_field_mapping") - .addDocValueField("flat_object_field", "use_field_mapping"); + .addDocValueField("flat_object_field1.foo", null); ; searchResponse = builder.get(); @@ -1199,7 +1199,7 @@ public void testDocValueFields() throws Exception { "keyword_field", "binary_field", "ip_field", - "flat_object_field" + "flat_object_field1.foo" ) ) ); @@ -1219,7 +1219,7 @@ public void testDocValueFields() throws Exception { assertThat(searchResponse.getHits().getAt(0).getFields().get("keyword_field").getValue(), equalTo("foo")); assertThat(searchResponse.getHits().getAt(0).getFields().get("binary_field").getValue(), equalTo("KmQ")); assertThat(searchResponse.getHits().getAt(0).getFields().get("ip_field").getValue(), equalTo("::1")); - assertThat(searchResponse.getHits().getAt(0).getFields().get("flat_object_field").getValue(), equalTo("flat_object_field.foo")); + assertThat(searchResponse.getHits().getAt(0).getFields().get("flat_object_field1.foo").getValue(), equalTo("bar")); builder = client().prepareSearch() .setQuery(matchAllQuery()) diff --git a/server/src/main/java/org/opensearch/index/mapper/DocValueFetcher.java b/server/src/main/java/org/opensearch/index/mapper/DocValueFetcher.java index 827792cdb1091..48da9b30ac1b0 100644 --- a/server/src/main/java/org/opensearch/index/mapper/DocValueFetcher.java +++ b/server/src/main/java/org/opensearch/index/mapper/DocValueFetcher.java @@ -43,6 +43,7 @@ import java.util.List; import static java.util.Collections.emptyList; +import static org.opensearch.index.mapper.FlatObjectFieldMapper.DOC_VALUE_NO_MATCH; /** * Value fetcher that loads from doc values. @@ -70,7 +71,10 @@ public List fetchValues(SourceLookup lookup) throws IOException { } List result = new ArrayList(leaf.docValueCount()); for (int i = 0, count = leaf.docValueCount(); i < count; ++i) { - result.add(leaf.nextValue()); + Object value = leaf.nextValue(); + if (value != DOC_VALUE_NO_MATCH) { + result.add(value); + } } return result; } diff --git a/server/src/main/java/org/opensearch/index/mapper/DocumentMapper.java b/server/src/main/java/org/opensearch/index/mapper/DocumentMapper.java index 3ada04b41abd2..dae1e23b94e7a 100644 --- a/server/src/main/java/org/opensearch/index/mapper/DocumentMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/DocumentMapper.java @@ -38,6 +38,7 @@ import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Weight; +import org.apache.lucene.util.BitSet; import org.apache.lucene.util.BytesRef; import org.opensearch.OpenSearchGenerationException; import org.opensearch.common.annotation.PublicApi; @@ -53,6 +54,8 @@ import org.opensearch.index.analysis.IndexAnalyzers; import org.opensearch.index.mapper.MapperService.MergeReason; import org.opensearch.index.mapper.MetadataFieldMapper.TypeParser; +import org.opensearch.index.query.NestedQueryBuilder; +import org.opensearch.search.fetch.subphase.InnerHitsContext; import org.opensearch.search.internal.SearchContext; import java.io.IOException; @@ -270,25 +273,15 @@ public ParsedDocument createNoopTombstoneDoc(String index, String reason) throws * Returns the best nested {@link ObjectMapper} instances that is in the scope of the specified nested docId. */ public ObjectMapper findNestedObjectMapper(int nestedDocId, SearchContext sc, LeafReaderContext context) throws IOException { + if (sc instanceof NestedQueryBuilder.NestedInnerHitSubContext) { + ObjectMapper objectMapper = ((NestedQueryBuilder.NestedInnerHitSubContext) sc).getChildObjectMapper(); + assert objectMappers().containsKey(objectMapper.fullPath()); + assert containSubDocIdWithObjectMapper(nestedDocId, objectMapper, sc, context); + return objectMapper; + } ObjectMapper nestedObjectMapper = null; for (ObjectMapper objectMapper : objectMappers().values()) { - if (!objectMapper.nested().isNested()) { - continue; - } - - Query filter = objectMapper.nestedTypeFilter(); - if (filter == null) { - continue; - } - // We can pass down 'null' as acceptedDocs, because nestedDocId is a doc to be fetched and - // therefore is guaranteed to be a live doc. - final Weight nestedWeight = filter.createWeight(sc.searcher(), ScoreMode.COMPLETE_NO_SCORES, 1f); - Scorer scorer = nestedWeight.scorer(context); - if (scorer == null) { - continue; - } - - if (scorer.iterator().advance(nestedDocId) == nestedDocId) { + if(containSubDocIdWithObjectMapper(nestedDocId, objectMapper, sc, context)) { if (nestedObjectMapper == null) { nestedObjectMapper = objectMapper; } else { @@ -301,6 +294,26 @@ public ObjectMapper findNestedObjectMapper(int nestedDocId, SearchContext sc, Le return nestedObjectMapper; } + private boolean containSubDocIdWithObjectMapper(int nestedDocId, ObjectMapper objectMapper, + SearchContext sc, LeafReaderContext context) throws IOException { + if (!objectMapper.nested().isNested()) { + return false; + } + Query filter = objectMapper.nestedTypeFilter(); + if (filter == null) { + return false; + } + // We can pass down 'null' as acceptedDocs, because nestedDocId is a doc to be fetched and + // therefore is guaranteed to be a live doc. + BitSet nesteDocIds = sc.bitsetFilterCache().getBitSetProducer(filter).getBitSet(context); + boolean result; + if (nesteDocIds != null && nesteDocIds.get(nestedDocId)) { + return true; + } else { + return false; + } + } + public DocumentMapper merge(Mapping mapping, MergeReason reason) { Mapping merged = this.mapping.merge(mapping, reason); return new DocumentMapper(mapperService, merged); diff --git a/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java index 0ccdb40f9d33a..13063a4761006 100644 --- a/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java @@ -28,6 +28,7 @@ import org.opensearch.common.unit.Fuzziness; import org.opensearch.common.xcontent.JsonToStringXContentParser; import org.opensearch.core.common.ParsingException; +import org.opensearch.core.common.io.stream.StreamOutput; import org.opensearch.core.xcontent.DeprecationHandler; import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.core.xcontent.XContentParser; @@ -36,11 +37,13 @@ import org.opensearch.index.fielddata.plain.SortedSetOrdinalsIndexFieldData; import org.opensearch.index.mapper.KeywordFieldMapper.KeywordFieldType; import org.opensearch.index.query.QueryShardContext; +import org.opensearch.search.DocValueFormat; import org.opensearch.search.aggregations.support.CoreValuesSourceType; import org.opensearch.search.lookup.SearchLookup; import java.io.IOException; import java.io.UncheckedIOException; +import java.time.ZoneId; import java.util.ArrayList; import java.util.Collections; import java.util.Iterator; @@ -63,6 +66,7 @@ public final class FlatObjectFieldMapper extends DynamicKeyFieldMapper { public static final String CONTENT_TYPE = "flat_object"; + public static final Object DOC_VALUE_NO_MATCH = new Object(); /** * In flat_object field mapper, field type is similar to keyword field type @@ -272,7 +276,7 @@ NamedAnalyzer normalizer() { @Override public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName, Supplier searchLookup) { failIfNoDocValues(); - return new SortedSetOrdinalsIndexFieldData.Builder(name(), CoreValuesSourceType.BYTES); + return new SortedSetOrdinalsIndexFieldData.Builder(valueFieldType().name(), CoreValuesSourceType.BYTES); } @Override @@ -304,6 +308,30 @@ protected String parseSourceValue(Object value) { }; } + @Override + public DocValueFormat docValueFormat(@Nullable String format, ZoneId timeZone) { + if (format != null) { + throw new IllegalArgumentException("Field [" + name() + "] of type [" + typeName() + "] does not support custom formats"); + } + if (timeZone != null) { + throw new IllegalArgumentException( + "Field [" + name() + "] of type [" + typeName() + "] does not support custom time zones" + ); + } + if (mappedFieldTypeName != null) { + return new FlatObjectDocValueFormat(mappedFieldTypeName + DOT_SYMBOL + name() + EQUAL_SYMBOL); + } else { + throw new IllegalArgumentException( + "Field [" + name() + "] of type [" + typeName() + "] does not support doc_value in root field" + ); + } + } + + @Override + public boolean isAggregatable() { + return false; + } + @Override public Object valueForDisplay(Object value) { if (value == null) { @@ -530,6 +558,39 @@ public Query wildcardQuery( return valueFieldType().wildcardQuery(rewriteValue(value), method, caseInsensitve, context); } + /** + * A doc_value formatter for flat_object field. + */ + public class FlatObjectDocValueFormat implements DocValueFormat { + private static final String NAME = "flat_object"; + private final String prefix; + + public FlatObjectDocValueFormat(String prefix) { + this.prefix = prefix; + } + + @Override + public String getWriteableName() { + return NAME; + } + + @Override + public void writeTo(StreamOutput out) {} + + @Override + public Object format(BytesRef value) { + String parsedValue = inputToString(value); + if (parsedValue.startsWith(prefix) == false) { + return DOC_VALUE_NO_MATCH; + } + return parsedValue.substring(prefix.length()); + } + + @Override + public BytesRef parseBytesRef(String value) { + return new BytesRef((String) valueFieldType.rewriteForDocValue(rewriteValue(value))); + } + } } private final ValueFieldMapper valueFieldMapper; diff --git a/server/src/main/java/org/opensearch/index/query/NestedQueryBuilder.java b/server/src/main/java/org/opensearch/index/query/NestedQueryBuilder.java index ec7e62035a82f..6cf34c5b7db30 100644 --- a/server/src/main/java/org/opensearch/index/query/NestedQueryBuilder.java +++ b/server/src/main/java/org/opensearch/index/query/NestedQueryBuilder.java @@ -438,7 +438,7 @@ protected void doBuild(SearchContext parentSearchContext, InnerHitsContext inner * * @opensearch.internal */ - static final class NestedInnerHitSubContext extends InnerHitsContext.InnerHitSubContext { + public static final class NestedInnerHitSubContext extends InnerHitsContext.InnerHitSubContext { private final ObjectMapper parentObjectMapper; private final ObjectMapper childObjectMapper; @@ -507,6 +507,10 @@ public TopDocsAndMaxScore topDocs(SearchHit hit) throws IOException { return new TopDocsAndMaxScore(td, maxScore); } } + + public ObjectMapper getChildObjectMapper() { + return childObjectMapper; + } } @Override diff --git a/server/src/main/java/org/opensearch/search/fetch/FetchPhase.java b/server/src/main/java/org/opensearch/search/fetch/FetchPhase.java index 1698f41caaf2b..e52aa3443fecd 100644 --- a/server/src/main/java/org/opensearch/search/fetch/FetchPhase.java +++ b/server/src/main/java/org/opensearch/search/fetch/FetchPhase.java @@ -501,7 +501,6 @@ private SearchHit.NestedIdentity getInternalNestedIdentity( ObjectMapper current = nestedObjectMapper; String originalName = nestedObjectMapper.name(); SearchHit.NestedIdentity nestedIdentity = null; - final IndexSettings indexSettings = context.getQueryShardContext().getIndexSettings(); do { Query parentFilter; nestedParentObjectMapper = current.getParentObjectMapper(mapperService); @@ -520,14 +519,11 @@ private SearchHit.NestedIdentity getInternalNestedIdentity( current = nestedParentObjectMapper; continue; } - final Weight childWeight = context.searcher() - .createWeight(context.searcher().rewrite(childFilter), ScoreMode.COMPLETE_NO_SCORES, 1f); - Scorer childScorer = childWeight.scorer(subReaderContext); - if (childScorer == null) { + BitSet childIter = context.bitsetFilterCache().getBitSetProducer(context.searcher().rewrite(childFilter)).getBitSet(subReaderContext); + if (childIter == null) { current = nestedParentObjectMapper; continue; } - DocIdSetIterator childIter = childScorer.iterator(); BitSet parentBits = context.bitsetFilterCache().getBitSetProducer(parentFilter).getBitSet(subReaderContext); @@ -541,8 +537,8 @@ private SearchHit.NestedIdentity getInternalNestedIdentity( * that appear before him. */ int previousParent = parentBits.prevSetBit(currentParent); - for (int docId = childIter.advance(previousParent + 1); docId < nestedSubDocId - && docId != DocIdSetIterator.NO_MORE_DOCS; docId = childIter.nextDoc()) { + for (int docId = childIter.nextSetBit(previousParent + 1); docId < nestedSubDocId + && docId != DocIdSetIterator.NO_MORE_DOCS; docId = childIter.nextSetBit(docId + 1)) { offset++; } currentParent = nestedSubDocId; diff --git a/server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldMapperTests.java index afd9e994ce3ae..7e6aa00c87290 100644 --- a/server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldMapperTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldMapperTests.java @@ -21,6 +21,7 @@ import org.opensearch.core.xcontent.ToXContent; import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.index.query.QueryShardContext; +import org.opensearch.search.DocValueFormat; import java.io.IOException; @@ -397,6 +398,27 @@ public void testDeduplicationValue() throws IOException { assertEquals(new BytesRef("field.labels=3"), fieldValueAndPaths[4].binaryValue()); } + public void testFetchDocValues() throws IOException { + MapperService mapperService = createMapperService(fieldMapping(b -> b.field("type", "flat_object"))); + { + // test valueWithPathField + MappedFieldType ft = mapperService.fieldType("field.name"); + DocValueFormat format = ft.docValueFormat(null, null); + String storedValue = "field.field.name=1234"; + + Object object = format.format(new BytesRef(storedValue)); + assertEquals("1234", object); + } + + { + // test valueField + MappedFieldType ft = mapperService.fieldType("field"); + Throwable throwable = assertThrows(IllegalArgumentException.class, () -> ft.docValueFormat(null, null)); + assertEquals("Field [field] of type [flat_object] does not support doc_value in root field", throwable.getMessage()); + } + + } + @Override protected void registerParameters(ParameterChecker checker) throws IOException { // In the future we will want to make sure parameter updates are covered.