diff --git a/CHANGELOG.md b/CHANGELOG.md index 45bc56b505eb3..2edbadaad2f5d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -59,6 +59,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Changed - Indexed IP field supports `terms_query` with more than 1025 IP masks [#16391](https://github.com/opensearch-project/OpenSearch/pull/16391) - Make entries for dependencies from server/build.gradle to gradle version catalog ([#16707](https://github.com/opensearch-project/OpenSearch/pull/16707)) +- Optimize innerhits query performance [#16937](https://github.com/opensearch-project/OpenSearch/pull/16937) ### Deprecated - Performing update operation with default pipeline or final pipeline is deprecated ([#16712](https://github.com/opensearch-project/OpenSearch/pull/16712)) diff --git a/server/src/main/java/org/opensearch/index/mapper/DocumentMapper.java b/server/src/main/java/org/opensearch/index/mapper/DocumentMapper.java index 3ada04b41abd2..694e68ba107ed 100644 --- a/server/src/main/java/org/opensearch/index/mapper/DocumentMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/DocumentMapper.java @@ -35,9 +35,7 @@ import org.apache.lucene.document.StoredField; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.Query; -import org.apache.lucene.search.ScoreMode; -import org.apache.lucene.search.Scorer; -import org.apache.lucene.search.Weight; +import org.apache.lucene.util.BitSet; import org.apache.lucene.util.BytesRef; import org.opensearch.OpenSearchGenerationException; import org.opensearch.common.annotation.PublicApi; @@ -53,6 +51,7 @@ import org.opensearch.index.analysis.IndexAnalyzers; import org.opensearch.index.mapper.MapperService.MergeReason; import org.opensearch.index.mapper.MetadataFieldMapper.TypeParser; +import org.opensearch.index.query.NestedQueryBuilder; import org.opensearch.search.internal.SearchContext; import java.io.IOException; @@ -270,25 +269,15 @@ public ParsedDocument createNoopTombstoneDoc(String index, String reason) throws * Returns the best nested {@link ObjectMapper} instances that is in the scope of the specified nested docId. */ public ObjectMapper findNestedObjectMapper(int nestedDocId, SearchContext sc, LeafReaderContext context) throws IOException { + if (sc instanceof NestedQueryBuilder.NestedInnerHitSubContext) { + ObjectMapper objectMapper = ((NestedQueryBuilder.NestedInnerHitSubContext) sc).getChildObjectMapper(); + assert objectMappers().containsKey(objectMapper.fullPath()); + assert containSubDocIdWithObjectMapper(nestedDocId, objectMapper, sc, context); + return objectMapper; + } ObjectMapper nestedObjectMapper = null; for (ObjectMapper objectMapper : objectMappers().values()) { - if (!objectMapper.nested().isNested()) { - continue; - } - - Query filter = objectMapper.nestedTypeFilter(); - if (filter == null) { - continue; - } - // We can pass down 'null' as acceptedDocs, because nestedDocId is a doc to be fetched and - // therefore is guaranteed to be a live doc. - final Weight nestedWeight = filter.createWeight(sc.searcher(), ScoreMode.COMPLETE_NO_SCORES, 1f); - Scorer scorer = nestedWeight.scorer(context); - if (scorer == null) { - continue; - } - - if (scorer.iterator().advance(nestedDocId) == nestedDocId) { + if (containSubDocIdWithObjectMapper(nestedDocId, objectMapper, sc, context)) { if (nestedObjectMapper == null) { nestedObjectMapper = objectMapper; } else { @@ -301,6 +290,26 @@ public ObjectMapper findNestedObjectMapper(int nestedDocId, SearchContext sc, Le return nestedObjectMapper; } + private boolean containSubDocIdWithObjectMapper(int nestedDocId, ObjectMapper objectMapper, SearchContext sc, LeafReaderContext context) + throws IOException { + if (!objectMapper.nested().isNested()) { + return false; + } + Query filter = objectMapper.nestedTypeFilter(); + if (filter == null) { + return false; + } + // We can pass down 'null' as acceptedDocs, because nestedDocId is a doc to be fetched and + // therefore is guaranteed to be a live doc. + BitSet nesteDocIds = sc.bitsetFilterCache().getBitSetProducer(filter).getBitSet(context); + boolean result; + if (nesteDocIds != null && nesteDocIds.get(nestedDocId)) { + return true; + } else { + return false; + } + } + public DocumentMapper merge(Mapping mapping, MergeReason reason) { Mapping merged = this.mapping.merge(mapping, reason); return new DocumentMapper(mapperService, merged); diff --git a/server/src/main/java/org/opensearch/index/query/NestedQueryBuilder.java b/server/src/main/java/org/opensearch/index/query/NestedQueryBuilder.java index ec7e62035a82f..6cf34c5b7db30 100644 --- a/server/src/main/java/org/opensearch/index/query/NestedQueryBuilder.java +++ b/server/src/main/java/org/opensearch/index/query/NestedQueryBuilder.java @@ -438,7 +438,7 @@ protected void doBuild(SearchContext parentSearchContext, InnerHitsContext inner * * @opensearch.internal */ - static final class NestedInnerHitSubContext extends InnerHitsContext.InnerHitSubContext { + public static final class NestedInnerHitSubContext extends InnerHitsContext.InnerHitSubContext { private final ObjectMapper parentObjectMapper; private final ObjectMapper childObjectMapper; @@ -507,6 +507,10 @@ public TopDocsAndMaxScore topDocs(SearchHit hit) throws IOException { return new TopDocsAndMaxScore(td, maxScore); } } + + public ObjectMapper getChildObjectMapper() { + return childObjectMapper; + } } @Override diff --git a/server/src/main/java/org/opensearch/search/fetch/FetchPhase.java b/server/src/main/java/org/opensearch/search/fetch/FetchPhase.java index 1698f41caaf2b..11a1b9a97235b 100644 --- a/server/src/main/java/org/opensearch/search/fetch/FetchPhase.java +++ b/server/src/main/java/org/opensearch/search/fetch/FetchPhase.java @@ -38,10 +38,7 @@ import org.apache.lucene.index.ReaderUtil; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.Query; -import org.apache.lucene.search.ScoreMode; -import org.apache.lucene.search.Scorer; import org.apache.lucene.search.TotalHits; -import org.apache.lucene.search.Weight; import org.apache.lucene.util.BitSet; import org.opensearch.common.CheckedBiConsumer; import org.opensearch.common.annotation.PublicApi; @@ -55,7 +52,6 @@ import org.opensearch.core.common.text.Text; import org.opensearch.core.tasks.TaskCancelledException; import org.opensearch.core.xcontent.MediaType; -import org.opensearch.index.IndexSettings; import org.opensearch.index.fieldvisitor.CustomFieldsVisitor; import org.opensearch.index.fieldvisitor.FieldsVisitor; import org.opensearch.index.mapper.DocumentMapper; @@ -501,7 +497,6 @@ private SearchHit.NestedIdentity getInternalNestedIdentity( ObjectMapper current = nestedObjectMapper; String originalName = nestedObjectMapper.name(); SearchHit.NestedIdentity nestedIdentity = null; - final IndexSettings indexSettings = context.getQueryShardContext().getIndexSettings(); do { Query parentFilter; nestedParentObjectMapper = current.getParentObjectMapper(mapperService); @@ -520,14 +515,13 @@ private SearchHit.NestedIdentity getInternalNestedIdentity( current = nestedParentObjectMapper; continue; } - final Weight childWeight = context.searcher() - .createWeight(context.searcher().rewrite(childFilter), ScoreMode.COMPLETE_NO_SCORES, 1f); - Scorer childScorer = childWeight.scorer(subReaderContext); - if (childScorer == null) { + BitSet childIter = context.bitsetFilterCache() + .getBitSetProducer(context.searcher().rewrite(childFilter)) + .getBitSet(subReaderContext); + if (childIter == null) { current = nestedParentObjectMapper; continue; } - DocIdSetIterator childIter = childScorer.iterator(); BitSet parentBits = context.bitsetFilterCache().getBitSetProducer(parentFilter).getBitSet(subReaderContext); @@ -541,8 +535,8 @@ private SearchHit.NestedIdentity getInternalNestedIdentity( * that appear before him. */ int previousParent = parentBits.prevSetBit(currentParent); - for (int docId = childIter.advance(previousParent + 1); docId < nestedSubDocId - && docId != DocIdSetIterator.NO_MORE_DOCS; docId = childIter.nextDoc()) { + for (int docId = childIter.nextSetBit(previousParent + 1); docId < nestedSubDocId + && docId != DocIdSetIterator.NO_MORE_DOCS; docId = childIter.nextSetBit(docId + 1)) { offset++; } currentParent = nestedSubDocId;