Skip to content

Commit

Permalink
Retrieve value from DocValues in a flat_object filed (opensearch-proj…
Browse files Browse the repository at this point in the history
…ect#16802)

optimize innerhit phase
  • Loading branch information
kkewwei committed Dec 29, 2024
1 parent 54ae54a commit e301066
Show file tree
Hide file tree
Showing 9 changed files with 189 additions and 42 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Added a precaution to handle extreme date values during sorting to prevent `arithmetic_exception: long overflow` ([#16812](https://github.com/opensearch-project/OpenSearch/pull/16812)).
- Add search replica stats to segment replication stats API ([#16678](https://github.com/opensearch-project/OpenSearch/pull/16678))
- Introduce a setting to disable download of full cluster state from remote on term mismatch([#16798](https://github.com/opensearch-project/OpenSearch/pull/16798/))
- Added ability to retrieve value from DocValues in a flat_object filed([#16802](https://github.com/opensearch-project/OpenSearch/pull/16802))

### Dependencies
- Bump `com.google.cloud:google-cloud-core-http` from 2.23.0 to 2.47.0 ([#16504](https://github.com/opensearch-project/OpenSearch/pull/16504))
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
---
# The test setup includes:
# - Create flat_object mapping for flat_object_doc_values_test index
# - Index 9 example documents
# - Search tests about doc_values and index
# - 1.Create flat_object mapping for flat_object_doc_values_test index
# - 2.Index 9 example documents
# - 3.Search tests about doc_values and index
# - 4.Fetch doc_value from flat_object field

setup:
- skip:
Expand Down Expand Up @@ -786,3 +787,48 @@ teardown:

- length: { hits.hits: 1 }
- match: { hits.hits.0._source.order: "order8" }

# Stored Fields with exact dot path.
- do:
search:
body: {
_source: false,
query: {
bool: {
must: [
{
term: {
order: "order0"
}
}
]
}
},
stored_fields: "_none_",
docvalue_fields: [ "issue.labels.name","order" ]
}

- length: { hits.hits: 1 }
- match: { hits.hits.0.fields: { "order" : [ "order0" ], "issue.labels.name": [ "abc0" ] } }

- do:
search:
body: {
_source: false,
query: {
bool: {
must: [
{
term: {
order: "order0"
}
}
]
}
},
stored_fields: "_none_",
docvalue_fields: [ "issue.labels.name" ]
}

- length: { hits.hits: 1 }
- match: { hits.hits.0.fields: { "issue.labels.name": [ "abc0" ] } }
Original file line number Diff line number Diff line change
Expand Up @@ -1023,7 +1023,7 @@ public void testDocValueFields() throws Exception {
.startObject("ip_field")
.field("type", "ip")
.endObject()
.startObject("flat_object_field")
.startObject("flat_object_field1")
.field("type", "flat_object")
.endObject()
.endObject()
Expand All @@ -1050,9 +1050,11 @@ public void testDocValueFields() throws Exception {
.field("boolean_field", true)
.field("binary_field", new byte[] { 42, 100 })
.field("ip_field", "::1")
.field("flat_object_field")
.field("flat_object_field1")
.startObject()
.field("fooa", "bara")
.field("foo", "bar")
.field("foob", "barb")
.endObject()
.endObject()
)
Expand All @@ -1075,7 +1077,7 @@ public void testDocValueFields() throws Exception {
.addDocValueField("boolean_field")
.addDocValueField("binary_field")
.addDocValueField("ip_field")
.addDocValueField("flat_object_field");
.addDocValueField("flat_object_field1.foo");
SearchResponse searchResponse = builder.get();

assertThat(searchResponse.getHits().getTotalHits().value, equalTo(1L));
Expand All @@ -1097,7 +1099,7 @@ public void testDocValueFields() throws Exception {
"keyword_field",
"binary_field",
"ip_field",
"flat_object_field"
"flat_object_field1.foo"
)
)
);
Expand All @@ -1116,7 +1118,7 @@ public void testDocValueFields() throws Exception {
assertThat(searchResponse.getHits().getAt(0).getFields().get("keyword_field").getValue(), equalTo("foo"));
assertThat(searchResponse.getHits().getAt(0).getFields().get("binary_field").getValue(), equalTo("KmQ"));
assertThat(searchResponse.getHits().getAt(0).getFields().get("ip_field").getValue(), equalTo("::1"));
assertThat(searchResponse.getHits().getAt(0).getFields().get("flat_object_field").getValue(), equalTo("flat_object_field.foo"));
assertThat(searchResponse.getHits().getAt(0).getFields().get("flat_object_field1.foo").getValue(), equalTo("bar"));

builder = client().prepareSearch().setQuery(matchAllQuery()).addDocValueField("*field");
searchResponse = builder.get();
Expand All @@ -1139,8 +1141,7 @@ public void testDocValueFields() throws Exception {
"text_field",
"keyword_field",
"binary_field",
"ip_field",
"flat_object_field"
"ip_field"
)
)
);
Expand All @@ -1160,7 +1161,6 @@ public void testDocValueFields() throws Exception {
assertThat(searchResponse.getHits().getAt(0).getFields().get("keyword_field").getValue(), equalTo("foo"));
assertThat(searchResponse.getHits().getAt(0).getFields().get("binary_field").getValue(), equalTo("KmQ"));
assertThat(searchResponse.getHits().getAt(0).getFields().get("ip_field").getValue(), equalTo("::1"));
assertThat(searchResponse.getHits().getAt(0).getFields().get("flat_object_field").getValue(), equalTo("flat_object_field.foo"));

builder = client().prepareSearch()
.setQuery(matchAllQuery())
Expand All @@ -1176,7 +1176,7 @@ public void testDocValueFields() throws Exception {
.addDocValueField("boolean_field", "use_field_mapping")
.addDocValueField("binary_field", "use_field_mapping")
.addDocValueField("ip_field", "use_field_mapping")
.addDocValueField("flat_object_field", "use_field_mapping");
.addDocValueField("flat_object_field1.foo", null);
;
searchResponse = builder.get();

Expand All @@ -1199,7 +1199,7 @@ public void testDocValueFields() throws Exception {
"keyword_field",
"binary_field",
"ip_field",
"flat_object_field"
"flat_object_field1.foo"
)
)
);
Expand All @@ -1219,7 +1219,7 @@ public void testDocValueFields() throws Exception {
assertThat(searchResponse.getHits().getAt(0).getFields().get("keyword_field").getValue(), equalTo("foo"));
assertThat(searchResponse.getHits().getAt(0).getFields().get("binary_field").getValue(), equalTo("KmQ"));
assertThat(searchResponse.getHits().getAt(0).getFields().get("ip_field").getValue(), equalTo("::1"));
assertThat(searchResponse.getHits().getAt(0).getFields().get("flat_object_field").getValue(), equalTo("flat_object_field.foo"));
assertThat(searchResponse.getHits().getAt(0).getFields().get("flat_object_field1.foo").getValue(), equalTo("bar"));

builder = client().prepareSearch()
.setQuery(matchAllQuery())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
import java.util.List;

import static java.util.Collections.emptyList;
import static org.opensearch.index.mapper.FlatObjectFieldMapper.DOC_VALUE_NO_MATCH;

/**
* Value fetcher that loads from doc values.
Expand Down Expand Up @@ -70,7 +71,10 @@ public List<Object> fetchValues(SourceLookup lookup) throws IOException {
}
List<Object> result = new ArrayList<Object>(leaf.docValueCount());
for (int i = 0, count = leaf.docValueCount(); i < count; ++i) {
result.add(leaf.nextValue());
Object value = leaf.nextValue();
if (value != DOC_VALUE_NO_MATCH) {
result.add(value);
}
}
return result;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.BytesRef;
import org.opensearch.OpenSearchGenerationException;
import org.opensearch.common.annotation.PublicApi;
Expand All @@ -53,6 +54,8 @@
import org.opensearch.index.analysis.IndexAnalyzers;
import org.opensearch.index.mapper.MapperService.MergeReason;
import org.opensearch.index.mapper.MetadataFieldMapper.TypeParser;
import org.opensearch.index.query.NestedQueryBuilder;
import org.opensearch.search.fetch.subphase.InnerHitsContext;
import org.opensearch.search.internal.SearchContext;

import java.io.IOException;
Expand Down Expand Up @@ -270,25 +273,15 @@ public ParsedDocument createNoopTombstoneDoc(String index, String reason) throws
* Returns the best nested {@link ObjectMapper} instances that is in the scope of the specified nested docId.
*/
public ObjectMapper findNestedObjectMapper(int nestedDocId, SearchContext sc, LeafReaderContext context) throws IOException {
if (sc instanceof NestedQueryBuilder.NestedInnerHitSubContext) {
ObjectMapper objectMapper = ((NestedQueryBuilder.NestedInnerHitSubContext) sc).getChildObjectMapper();
assert objectMappers().containsKey(objectMapper.fullPath());
assert containSubDocIdWithObjectMapper(nestedDocId, objectMapper, sc, context);
return objectMapper;
}
ObjectMapper nestedObjectMapper = null;
for (ObjectMapper objectMapper : objectMappers().values()) {
if (!objectMapper.nested().isNested()) {
continue;
}

Query filter = objectMapper.nestedTypeFilter();
if (filter == null) {
continue;
}
// We can pass down 'null' as acceptedDocs, because nestedDocId is a doc to be fetched and
// therefore is guaranteed to be a live doc.
final Weight nestedWeight = filter.createWeight(sc.searcher(), ScoreMode.COMPLETE_NO_SCORES, 1f);
Scorer scorer = nestedWeight.scorer(context);
if (scorer == null) {
continue;
}

if (scorer.iterator().advance(nestedDocId) == nestedDocId) {
if(containSubDocIdWithObjectMapper(nestedDocId, objectMapper, sc, context)) {
if (nestedObjectMapper == null) {
nestedObjectMapper = objectMapper;
} else {
Expand All @@ -301,6 +294,26 @@ public ObjectMapper findNestedObjectMapper(int nestedDocId, SearchContext sc, Le
return nestedObjectMapper;
}

private boolean containSubDocIdWithObjectMapper(int nestedDocId, ObjectMapper objectMapper,
SearchContext sc, LeafReaderContext context) throws IOException {
if (!objectMapper.nested().isNested()) {
return false;
}
Query filter = objectMapper.nestedTypeFilter();
if (filter == null) {
return false;
}
// We can pass down 'null' as acceptedDocs, because nestedDocId is a doc to be fetched and
// therefore is guaranteed to be a live doc.
BitSet nesteDocIds = sc.bitsetFilterCache().getBitSetProducer(filter).getBitSet(context);
boolean result;
if (nesteDocIds != null && nesteDocIds.get(nestedDocId)) {
return true;
} else {
return false;
}
}

public DocumentMapper merge(Mapping mapping, MergeReason reason) {
Mapping merged = this.mapping.merge(mapping, reason);
return new DocumentMapper(mapperService, merged);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import org.opensearch.common.unit.Fuzziness;
import org.opensearch.common.xcontent.JsonToStringXContentParser;
import org.opensearch.core.common.ParsingException;
import org.opensearch.core.common.io.stream.StreamOutput;
import org.opensearch.core.xcontent.DeprecationHandler;
import org.opensearch.core.xcontent.NamedXContentRegistry;
import org.opensearch.core.xcontent.XContentParser;
Expand All @@ -36,11 +37,13 @@
import org.opensearch.index.fielddata.plain.SortedSetOrdinalsIndexFieldData;
import org.opensearch.index.mapper.KeywordFieldMapper.KeywordFieldType;
import org.opensearch.index.query.QueryShardContext;
import org.opensearch.search.DocValueFormat;
import org.opensearch.search.aggregations.support.CoreValuesSourceType;
import org.opensearch.search.lookup.SearchLookup;

import java.io.IOException;
import java.io.UncheckedIOException;
import java.time.ZoneId;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
Expand All @@ -63,6 +66,7 @@
public final class FlatObjectFieldMapper extends DynamicKeyFieldMapper {

public static final String CONTENT_TYPE = "flat_object";
public static final Object DOC_VALUE_NO_MATCH = new Object();

/**
* In flat_object field mapper, field type is similar to keyword field type
Expand Down Expand Up @@ -272,7 +276,7 @@ NamedAnalyzer normalizer() {
@Override
public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName, Supplier<SearchLookup> searchLookup) {
failIfNoDocValues();
return new SortedSetOrdinalsIndexFieldData.Builder(name(), CoreValuesSourceType.BYTES);
return new SortedSetOrdinalsIndexFieldData.Builder(valueFieldType().name(), CoreValuesSourceType.BYTES);
}

@Override
Expand Down Expand Up @@ -304,6 +308,30 @@ protected String parseSourceValue(Object value) {
};
}

@Override
public DocValueFormat docValueFormat(@Nullable String format, ZoneId timeZone) {
if (format != null) {
throw new IllegalArgumentException("Field [" + name() + "] of type [" + typeName() + "] does not support custom formats");
}
if (timeZone != null) {
throw new IllegalArgumentException(
"Field [" + name() + "] of type [" + typeName() + "] does not support custom time zones"
);
}
if (mappedFieldTypeName != null) {
return new FlatObjectDocValueFormat(mappedFieldTypeName + DOT_SYMBOL + name() + EQUAL_SYMBOL);
} else {
throw new IllegalArgumentException(
"Field [" + name() + "] of type [" + typeName() + "] does not support doc_value in root field"
);
}
}

@Override
public boolean isAggregatable() {
return false;
}

@Override
public Object valueForDisplay(Object value) {
if (value == null) {
Expand Down Expand Up @@ -530,6 +558,39 @@ public Query wildcardQuery(
return valueFieldType().wildcardQuery(rewriteValue(value), method, caseInsensitve, context);
}

/**
* A doc_value formatter for flat_object field.
*/
public class FlatObjectDocValueFormat implements DocValueFormat {
private static final String NAME = "flat_object";
private final String prefix;

public FlatObjectDocValueFormat(String prefix) {
this.prefix = prefix;
}

@Override
public String getWriteableName() {
return NAME;
}

@Override
public void writeTo(StreamOutput out) {}

@Override
public Object format(BytesRef value) {
String parsedValue = inputToString(value);
if (parsedValue.startsWith(prefix) == false) {
return DOC_VALUE_NO_MATCH;
}
return parsedValue.substring(prefix.length());
}

@Override
public BytesRef parseBytesRef(String value) {
return new BytesRef((String) valueFieldType.rewriteForDocValue(rewriteValue(value)));
}
}
}

private final ValueFieldMapper valueFieldMapper;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -438,7 +438,7 @@ protected void doBuild(SearchContext parentSearchContext, InnerHitsContext inner
*
* @opensearch.internal
*/
static final class NestedInnerHitSubContext extends InnerHitsContext.InnerHitSubContext {
public static final class NestedInnerHitSubContext extends InnerHitsContext.InnerHitSubContext {

private final ObjectMapper parentObjectMapper;
private final ObjectMapper childObjectMapper;
Expand Down Expand Up @@ -507,6 +507,10 @@ public TopDocsAndMaxScore topDocs(SearchHit hit) throws IOException {
return new TopDocsAndMaxScore(td, maxScore);
}
}

public ObjectMapper getChildObjectMapper() {
return childObjectMapper;
}
}

@Override
Expand Down
Loading

0 comments on commit e301066

Please sign in to comment.