Skip to content

Commit

Permalink
[8.x] Enable _tier based coordinator rewrites for all indices (not ju…
Browse files Browse the repository at this point in the history
…st mounted indices) (#115797) (#116076)

* Enable _tier based coordinator rewrites for all indices (not just mounted indices) (#115797)

As part of #114990 we
enabled using the `_tier` field as part of the coordinator rewrite in
order to skip  shards that do not match a `_tier` filter, but only for
fully/partially mounted indices.

This PR enhances the previous work by allowing a coordinator rewrite to
skip shards that will not match the `_tier` query for all indices
(irrespective of their lifecycle state i.e. hot and warm  indices can
now skip shards based on the `_tier` query)

Note however, that hot/warm indices will not automatically take
advantage of the `can_match` coordinator rewrite  (like read only
indices do) but only the search requests that surpass the
`pre_filter_shard_size` threshold will.

Relates to
[#114910](#114910)

(cherry picked from commit 71dfb06)
Signed-off-by: Andrei Dan <[email protected]>

* Fix test compilation

---------

Co-authored-by: Elastic Machine <[email protected]>
  • Loading branch information
andreidan and elasticmachine authored Nov 1, 2024
1 parent c5457b3 commit d0eb5a0
Show file tree
Hide file tree
Showing 6 changed files with 258 additions and 39 deletions.
6 changes: 6 additions & 0 deletions docs/changelog/115797.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 115797
summary: Enable `_tier` based coordinator rewrites for all indices (not just mounted
indices)
area: Search
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ public class CoordinatorRewriteContext extends QueryRewriteContext {

public static final String TIER_FIELD_NAME = "_tier";

private static final ConstantFieldType TIER_FIELD_TYPE = new ConstantFieldType(TIER_FIELD_NAME, Map.of()) {
static final ConstantFieldType TIER_FIELD_TYPE = new ConstantFieldType(TIER_FIELD_NAME, Map.of()) {
@Override
public ValueFetcher valueFetcher(SearchExecutionContext context, String format) {
throw new UnsupportedOperationException("fetching field values is not supported on the coordinator node");
Expand Down Expand Up @@ -69,6 +69,7 @@ public Query existsQuery(SearchExecutionContext context) {
}
};

@Nullable
private final DateFieldRangeInfo dateFieldRangeInfo;
private final String tier;

Expand All @@ -85,7 +86,7 @@ public CoordinatorRewriteContext(
XContentParserConfiguration parserConfig,
Client client,
LongSupplier nowInMillis,
DateFieldRangeInfo dateFieldRangeInfo,
@Nullable DateFieldRangeInfo dateFieldRangeInfo,
String tier
) {
super(
Expand Down Expand Up @@ -116,9 +117,9 @@ public CoordinatorRewriteContext(
*/
@Nullable
public MappedFieldType getFieldType(String fieldName) {
if (DataStream.TIMESTAMP_FIELD_NAME.equals(fieldName)) {
if (dateFieldRangeInfo != null && DataStream.TIMESTAMP_FIELD_NAME.equals(fieldName)) {
return dateFieldRangeInfo.timestampFieldType();
} else if (IndexMetadata.EVENT_INGESTED_FIELD_NAME.equals(fieldName)) {
} else if (dateFieldRangeInfo != null && IndexMetadata.EVENT_INGESTED_FIELD_NAME.equals(fieldName)) {
return dateFieldRangeInfo.eventIngestedFieldType();
} else if (TIER_FIELD_NAME.equals(fieldName)) {
return TIER_FIELD_TYPE;
Expand All @@ -133,9 +134,9 @@ public MappedFieldType getFieldType(String fieldName) {
*/
@Nullable
public IndexLongFieldRange getFieldRange(String fieldName) {
if (DataStream.TIMESTAMP_FIELD_NAME.equals(fieldName)) {
if (dateFieldRangeInfo != null && DataStream.TIMESTAMP_FIELD_NAME.equals(fieldName)) {
return dateFieldRangeInfo.timestampRange();
} else if (IndexMetadata.EVENT_INGESTED_FIELD_NAME.equals(fieldName)) {
} else if (dateFieldRangeInfo != null && IndexMetadata.EVENT_INGESTED_FIELD_NAME.equals(fieldName)) {
return dateFieldRangeInfo.eventIngestedRange();
} else {
return null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,35 +52,37 @@ public CoordinatorRewriteContext getCoordinatorRewriteContext(Index index) {
return null;
}
DateFieldRangeInfo dateFieldRangeInfo = mappingSupplier.apply(index);
// we've now added a coordinator rewrite based on the _tier field so the requirement
// for the timestamps fields to be present is artificial (we could do a coordinator
// rewrite only based on the _tier field) and we might decide to remove this artificial
// limitation to enable coordinator rewrites based on _tier for hot and warm indices
// (currently the _tier coordinator rewrite is only available for mounted and partially mounted
// indices)
if (dateFieldRangeInfo == null) {
return null;
}
DateFieldMapper.DateFieldType timestampFieldType = dateFieldRangeInfo.timestampFieldType();
IndexLongFieldRange timestampRange = indexMetadata.getTimestampRange();
IndexLongFieldRange eventIngestedRange = indexMetadata.getEventIngestedRange();
DateFieldMapper.DateFieldType timestampFieldType = null;
if (dateFieldRangeInfo != null) {
timestampFieldType = dateFieldRangeInfo.timestampFieldType();

if (timestampRange.containsAllShardRanges() == false) {
// if @timestamp range is not present or not ready in cluster state, fallback to using time series range (if present)
timestampRange = indexMetadata.getTimeSeriesTimestampRange(timestampFieldType);
// if timestampRange in the time series is null AND the eventIngestedRange is not ready for use, return null (no coord rewrite)
if (timestampRange == null && eventIngestedRange.containsAllShardRanges() == false) {
return null;
if (timestampRange.containsAllShardRanges() == false) {
// if @timestamp range is not present or not ready in cluster state, fallback to using time series range (if present)
timestampRange = indexMetadata.getTimeSeriesTimestampRange(timestampFieldType);
// if timestampRange in the time series is null AND the eventIngestedRange is not ready for use, return null (no coord
// rewrite)
if (timestampRange == null && eventIngestedRange.containsAllShardRanges() == false) {
return null;
}
}
}

// the DateFieldRangeInfo from the mappingSupplier only has field types, but not ranges
// so create a new object with ranges pulled from cluster state
return new CoordinatorRewriteContext(
parserConfig,
client,
nowInMillis,
new DateFieldRangeInfo(timestampFieldType, timestampRange, dateFieldRangeInfo.eventIngestedFieldType(), eventIngestedRange),
dateFieldRangeInfo == null
? null
// the DateFieldRangeInfo from the mappingSupplier only has field types, but not ranges
// so create a new object with ranges pulled from cluster state
: new DateFieldRangeInfo(
timestampFieldType,
timestampRange,
dateFieldRangeInfo.eventIngestedFieldType(),
eventIngestedRange
),
indexMetadata.getTierPreference().isEmpty() == false ? indexMetadata.getTierPreference().get(0) : ""
);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,18 @@
import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.cluster.node.DiscoveryNodeUtils;
import org.elasticsearch.cluster.routing.GroupShardsIterator;
import org.elasticsearch.cluster.routing.allocation.DataTier;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.UUIDs;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.IndexVersion;
import org.elasticsearch.index.mapper.DateFieldMapper;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.CoordinatorRewriteContext;
import org.elasticsearch.index.query.CoordinatorRewriteContextProvider;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.RangeQueryBuilder;
import org.elasticsearch.index.query.TermQueryBuilder;
import org.elasticsearch.index.shard.IndexLongFieldRange;
Expand Down Expand Up @@ -476,6 +479,98 @@ public void testCanMatchFilteringOnCoordinatorThatCanBeSkippedUsingEventIngested
doCanMatchFilteringOnCoordinatorThatCanBeSkipped(IndexMetadata.EVENT_INGESTED_FIELD_NAME);
}

public void testCanMatchFilteringOnCoordinatorSkipsBasedOnTier() throws Exception {
// we'll test that we're executing _tier coordinator rewrite for indices (data stream backing or regular) without any @timestamp
// or event.ingested fields
// for both data stream backing and regular indices we'll have one index in hot and one in warm. the warm indices will be skipped as
// our queries will filter based on _tier: hot

Map<Index, Settings.Builder> indexNameToSettings = new HashMap<>();
ClusterState state = ClusterState.EMPTY_STATE;

String dataStreamName = randomAlphaOfLengthBetween(10, 20);
Index warmDataStreamIndex = new Index(DataStream.getDefaultBackingIndexName(dataStreamName, 1), UUIDs.base64UUID());
indexNameToSettings.put(
warmDataStreamIndex,
settings(IndexVersion.current()).put(IndexMetadata.SETTING_INDEX_UUID, warmDataStreamIndex.getUUID())
.put(DataTier.TIER_PREFERENCE, "data_warm,data_hot")
);
Index hotDataStreamIndex = new Index(DataStream.getDefaultBackingIndexName(dataStreamName, 2), UUIDs.base64UUID());
indexNameToSettings.put(
hotDataStreamIndex,
settings(IndexVersion.current()).put(IndexMetadata.SETTING_INDEX_UUID, hotDataStreamIndex.getUUID())
.put(DataTier.TIER_PREFERENCE, "data_hot")
);
DataStream dataStream = DataStreamTestHelper.newInstance(dataStreamName, List.of(warmDataStreamIndex, hotDataStreamIndex));

Index warmRegularIndex = new Index("warm-index", UUIDs.base64UUID());
indexNameToSettings.put(
warmRegularIndex,
settings(IndexVersion.current()).put(IndexMetadata.SETTING_INDEX_UUID, warmRegularIndex.getUUID())
.put(DataTier.TIER_PREFERENCE, "data_warm,data_hot")
);
Index hotRegularIndex = new Index("hot-index", UUIDs.base64UUID());
indexNameToSettings.put(
hotRegularIndex,
settings(IndexVersion.current()).put(IndexMetadata.SETTING_INDEX_UUID, hotRegularIndex.getUUID())
.put(DataTier.TIER_PREFERENCE, "data_hot")
);

List<Index> allIndices = new ArrayList<>(4);
allIndices.addAll(dataStream.getIndices());
allIndices.add(warmRegularIndex);
allIndices.add(hotRegularIndex);

List<Index> hotIndices = List.of(hotRegularIndex, hotDataStreamIndex);
List<Index> warmIndices = List.of(warmRegularIndex, warmDataStreamIndex);

for (Index index : allIndices) {
IndexMetadata.Builder indexMetadataBuilder = IndexMetadata.builder(index.getName())
.settings(indexNameToSettings.get(index))
.numberOfShards(1)
.numberOfReplicas(0);
Metadata.Builder metadataBuilder = Metadata.builder(state.metadata()).put(indexMetadataBuilder);
state = ClusterState.builder(state).metadata(metadataBuilder).build();
}

ClusterState finalState = state;
CoordinatorRewriteContextProvider coordinatorRewriteContextProvider = new CoordinatorRewriteContextProvider(
parserConfig(),
mock(Client.class),
System::currentTimeMillis,
() -> finalState,
(index) -> null
);

BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery()
.filter(QueryBuilders.termQuery(CoordinatorRewriteContext.TIER_FIELD_NAME, "data_hot"));

assignShardsAndExecuteCanMatchPhase(
List.of(dataStream),
List.of(hotRegularIndex, warmRegularIndex),
coordinatorRewriteContextProvider,
boolQueryBuilder,
List.of(),
null,
(updatedSearchShardIterators, requests) -> {
var skippedShards = updatedSearchShardIterators.stream().filter(SearchShardIterator::skip).toList();
var nonSkippedShards = updatedSearchShardIterators.stream()
.filter(searchShardIterator -> searchShardIterator.skip() == false)
.toList();

boolean allSkippedShardAreFromWarmIndices = skippedShards.stream()
.allMatch(shardIterator -> warmIndices.contains(shardIterator.shardId().getIndex()));
assertThat(allSkippedShardAreFromWarmIndices, equalTo(true));
boolean allNonSkippedShardAreHotIndices = nonSkippedShards.stream()
.allMatch(shardIterator -> hotIndices.contains(shardIterator.shardId().getIndex()));
assertThat(allNonSkippedShardAreHotIndices, equalTo(true));
boolean allRequestMadeToHotIndices = requests.stream()
.allMatch(request -> hotIndices.contains(request.shardId().getIndex()));
assertThat(allRequestMadeToHotIndices, equalTo(true));
}
);
}

public void doCanMatchFilteringOnCoordinatorThatCanBeSkipped(String timestampField) throws Exception {
Index dataStreamIndex1 = new Index(".ds-mydata0001", UUIDs.base64UUID());
Index dataStreamIndex2 = new Index(".ds-mydata0002", UUIDs.base64UUID());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

import java.util.Collections;

import static org.elasticsearch.index.query.CoordinatorRewriteContext.TIER_FIELD_TYPE;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.nullValue;

Expand Down Expand Up @@ -86,13 +87,6 @@ public void testGetTierPreference() {

{
// coordinator rewrite context
IndexMetadata metadata = newIndexMeta(
"index",
Settings.builder()
.put(IndexMetadata.SETTING_VERSION_CREATED, IndexVersion.current())
.put(DataTier.TIER_PREFERENCE, "data_cold,data_warm,data_hot")
.build()
);
CoordinatorRewriteContext coordinatorRewriteContext = new CoordinatorRewriteContext(
parserConfig(),
null,
Expand All @@ -103,15 +97,9 @@ public void testGetTierPreference() {

assertThat(coordinatorRewriteContext.getTierPreference(), is("data_frozen"));
}

{
// coordinator rewrite context empty tier
IndexMetadata metadata = newIndexMeta(
"index",
Settings.builder()
.put(IndexMetadata.SETTING_VERSION_CREATED, IndexVersion.current())
.put(DataTier.TIER_PREFERENCE, "data_cold,data_warm,data_hot")
.build()
);
CoordinatorRewriteContext coordinatorRewriteContext = new CoordinatorRewriteContext(
parserConfig(),
null,
Expand All @@ -122,6 +110,25 @@ public void testGetTierPreference() {

assertThat(coordinatorRewriteContext.getTierPreference(), is(nullValue()));
}

{
// null date field range info
CoordinatorRewriteContext coordinatorRewriteContext = new CoordinatorRewriteContext(
parserConfig(),
null,
System::currentTimeMillis,
null,
"data_frozen"
);
assertThat(coordinatorRewriteContext.getFieldRange(IndexMetadata.EVENT_INGESTED_FIELD_NAME), is(nullValue()));
assertThat(coordinatorRewriteContext.getFieldRange(IndexMetadata.EVENT_INGESTED_FIELD_NAME), is(nullValue()));
// tier field doesn't have a range
assertThat(coordinatorRewriteContext.getFieldRange(CoordinatorRewriteContext.TIER_FIELD_NAME), is(nullValue()));
assertThat(coordinatorRewriteContext.getFieldType(IndexMetadata.EVENT_INGESTED_FIELD_NAME), is(nullValue()));
assertThat(coordinatorRewriteContext.getFieldType(IndexMetadata.EVENT_INGESTED_FIELD_NAME), is(nullValue()));
// _tier field type should still work even without the data field info
assertThat(coordinatorRewriteContext.getFieldType(CoordinatorRewriteContext.TIER_FIELD_NAME), is(TIER_FIELD_TYPE));
}
}

public static IndexMetadata newIndexMeta(String name, Settings indexSettings) {
Expand Down
Loading

0 comments on commit d0eb5a0

Please sign in to comment.