Skip to content

Commit

Permalink
Fix SearchTimeoutIT
Browse files Browse the repository at this point in the history
Two of the timeout tests have been muted for several months. The reason is that we
tightened the assertions to cover for partial results being returned, but there were
edge cases in which partial results were not actually returned.

The edge case is a typical timing issue where the initial check for timeout in
CancellableBulkScorer already triggers the timeout, before any document has been collected.

I made several adjustments to the test to make it more robust:
- use index random to index documents, that speeds it up
- share indexing across test methods, so that it happens once at the suite level
- raise a single timeout, rather than one per visited document which causes an unnecessary slowdown
- make sure that one document is always visited before a timeout, so that partial results are
always guaranteed to be returned

Closes elastic#98369
Closes elastic#98053
  • Loading branch information
javanna committed Jan 17, 2025
1 parent cbb7c24 commit ff43d5a
Showing 1 changed file with 57 additions and 43 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,9 @@

package org.elasticsearch.search;

import org.apache.lucene.util.ThreadInterruptedException;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.plugins.Plugin;
Expand All @@ -20,22 +21,26 @@
import org.elasticsearch.search.aggregations.bucket.terms.StringTerms;
import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder;
import org.elasticsearch.test.ESIntegTestCase;
import org.elasticsearch.test.hamcrest.ElasticsearchAssertions;

import java.util.Collection;
import java.util.Collections;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Function;

import static org.elasticsearch.action.support.WriteRequest.RefreshPolicy.IMMEDIATE;
import static org.elasticsearch.index.query.QueryBuilders.scriptQuery;
import static org.elasticsearch.search.SearchTimeoutIT.ScriptedTimeoutPlugin.SCRIPT_NAME;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.greaterThan;

@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.SUITE)
@ESIntegTestCase.SuiteScopeTestCase
public class SearchTimeoutIT extends ESIntegTestCase {

private static final AtomicInteger scriptExecutions = new AtomicInteger(0);

@Override
protected Collection<Class<? extends Plugin>> nodePlugins() {
return Collections.singleton(ScriptedTimeoutPlugin.class);
Expand All @@ -46,60 +51,63 @@ protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) {
return Settings.builder().put(super.nodeSettings(nodeOrdinal, otherSettings)).build();
}

private void indexDocs() {
for (int i = 0; i < 32; i++) {
prepareIndex("test").setId(Integer.toString(i)).setSource("field", "value").get();
}
refresh("test");
@Override
protected void setupSuiteScopeCluster() throws Exception {
super.setupSuiteScopeCluster();
indexRandom(true, "test", randomIntBetween(5, 30));
}

@Override
public void tearDown() throws Exception {
super.tearDown();
scriptExecutions.set(0);
}

@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/98369")
public void testTopHitsTimeout() {
indexDocs();
SearchResponse searchResponse = prepareSearch("test").setTimeout(new TimeValue(10, TimeUnit.MILLISECONDS))
.setQuery(scriptQuery(new Script(ScriptType.INLINE, "mockscript", SCRIPT_NAME, Collections.emptyMap())))
.get();
assertThat(searchResponse.isTimedOut(), equalTo(true));
assertEquals(0, searchResponse.getShardFailures().length);
assertEquals(0, searchResponse.getFailedShards());
assertThat(searchResponse.getSuccessfulShards(), greaterThan(0));
assertEquals(searchResponse.getSuccessfulShards(), searchResponse.getTotalShards());
assertThat(searchResponse.getHits().getTotalHits().value(), greaterThan(0L));
assertThat(searchResponse.getHits().getHits().length, greaterThan(0));
SearchRequestBuilder searchRequestBuilder = prepareSearch("test").setTimeout(new TimeValue(100, TimeUnit.MILLISECONDS))
.setQuery(scriptQuery(new Script(ScriptType.INLINE, "mockscript", SCRIPT_NAME, Collections.emptyMap())));
ElasticsearchAssertions.assertResponse(searchRequestBuilder, searchResponse -> {
assertThat(searchResponse.isTimedOut(), equalTo(true));
assertEquals(0, searchResponse.getShardFailures().length);
assertEquals(0, searchResponse.getFailedShards());
assertThat(searchResponse.getSuccessfulShards(), greaterThan(0));
assertEquals(searchResponse.getSuccessfulShards(), searchResponse.getTotalShards());
assertThat(searchResponse.getHits().getTotalHits().value(), greaterThan(0L));
assertThat(searchResponse.getHits().getHits().length, greaterThan(0));
});
}

@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/98053")
public void testAggsTimeout() {
indexDocs();
SearchResponse searchResponse = prepareSearch("test").setTimeout(new TimeValue(10, TimeUnit.MILLISECONDS))
indexRandom(true, "test", randomIntBetween(5, 30));
SearchRequestBuilder searchRequestBuilder = prepareSearch("test").setTimeout(new TimeValue(100, TimeUnit.MILLISECONDS))
.setSize(0)
.setQuery(scriptQuery(new Script(ScriptType.INLINE, "mockscript", SCRIPT_NAME, Collections.emptyMap())))
.addAggregation(new TermsAggregationBuilder("terms").field("field.keyword"))
.get();
assertThat(searchResponse.isTimedOut(), equalTo(true));
assertEquals(0, searchResponse.getShardFailures().length);
assertEquals(0, searchResponse.getFailedShards());
assertThat(searchResponse.getSuccessfulShards(), greaterThan(0));
assertEquals(searchResponse.getSuccessfulShards(), searchResponse.getTotalShards());
assertThat(searchResponse.getHits().getTotalHits().value(), greaterThan(0L));
assertEquals(searchResponse.getHits().getHits().length, 0);
StringTerms terms = searchResponse.getAggregations().get("terms");
assertEquals(1, terms.getBuckets().size());
StringTerms.Bucket bucket = terms.getBuckets().get(0);
assertEquals("value", bucket.getKeyAsString());
assertThat(bucket.getDocCount(), greaterThan(0L));
.addAggregation(new TermsAggregationBuilder("terms").field("field.keyword"));
ElasticsearchAssertions.assertResponse(searchRequestBuilder, searchResponse -> {
assertThat(searchResponse.isTimedOut(), equalTo(true));
assertEquals(0, searchResponse.getShardFailures().length);
assertEquals(0, searchResponse.getFailedShards());
assertThat(searchResponse.getSuccessfulShards(), greaterThan(0));
assertEquals(searchResponse.getSuccessfulShards(), searchResponse.getTotalShards());
assertThat(searchResponse.getHits().getTotalHits().value(), greaterThan(0L));
assertEquals(0, searchResponse.getHits().getHits().length);
StringTerms terms = searchResponse.getAggregations().get("terms");
assertEquals(1, terms.getBuckets().size());
StringTerms.Bucket bucket = terms.getBuckets().get(0);
assertEquals("value", bucket.getKeyAsString());
assertThat(bucket.getDocCount(), greaterThan(0L));
});
}

public void testPartialResultsIntolerantTimeout() throws Exception {
prepareIndex("test").setId("1").setSource("field", "value").setRefreshPolicy(IMMEDIATE).get();

public void testPartialResultsIntolerantTimeout() {
ElasticsearchException ex = expectThrows(
ElasticsearchException.class,
prepareSearch("test").setTimeout(new TimeValue(10, TimeUnit.MILLISECONDS))
.setQuery(scriptQuery(new Script(ScriptType.INLINE, "mockscript", SCRIPT_NAME, Collections.emptyMap())))
.setAllowPartialSearchResults(false) // this line causes timeouts to report failures
);
assertTrue(ex.toString().contains("Time exceeded"));
assertEquals(504, ex.status().getStatus());
}

public static class ScriptedTimeoutPlugin extends MockScriptPlugin {
Expand All @@ -108,10 +116,16 @@ public static class ScriptedTimeoutPlugin extends MockScriptPlugin {
@Override
public Map<String, Function<Map<String, Object>, Object>> pluginScripts() {
return Collections.singletonMap(SCRIPT_NAME, params -> {
try {
Thread.sleep(500);
} catch (InterruptedException e) {
throw new RuntimeException(e);
// sleep only once per test, but only after executing the script once without sleeping.
// This ensures that one document is always returned before the timeout happens.
// Also, don't sleep any further to avoid slowing down the test excessively.
// A timeout on a specific slice of a single shard is enough.
if (scriptExecutions.incrementAndGet() == 1) {
try {
Thread.sleep(500);
} catch (InterruptedException e) {
throw new ThreadInterruptedException(e);
}
}
return true;
});
Expand Down

0 comments on commit ff43d5a

Please sign in to comment.