Skip to content

Commit

Permalink
Enable failure detector in tests
Browse files Browse the repository at this point in the history
Disabled for raptor because currently they fail with failure-detector
enabled and jmx tests because they rely on having a consistent number of
nodes.
  • Loading branch information
rschlussel committed Feb 7, 2020
1 parent b937625 commit 3eef1b0
Show file tree
Hide file tree
Showing 6 changed files with 37 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,9 @@ private static QueryRunner createQueryRunner()
{
DistributedQueryRunner queryRunner = HiveQueryRunner.createQueryRunner(getTables(),
ImmutableMap.of("experimental.pushdown-subfields-enabled", "true"),
// TODO: enable failure detector. Currently this test has a ton of major GC activity on travis,
// and the failure detector may make the test run longer
ImmutableMap.of("failure-detector.enabled", "false"),
"sql-standard",
ImmutableMap.of("hive.pushdown-filter-enabled", "true"),
Optional.empty());
Expand Down Expand Up @@ -173,8 +176,8 @@ public void testLegacyUnnest()

assertQuery(legacyUnnest, "SELECT orderkey, date.day FROM lineitem_ex CROSS JOIN UNNEST(dates) t(date)",
"SELECT orderkey, day(shipdate) FROM lineitem WHERE orderkey % 31 <> 0 UNION ALL " +
"SELECT orderkey, day(commitdate) FROM lineitem WHERE orderkey % 31 <> 0 UNION ALL " +
"SELECT orderkey, day(receiptdate) FROM lineitem WHERE orderkey % 31 <> 0");
"SELECT orderkey, day(commitdate) FROM lineitem WHERE orderkey % 31 <> 0 UNION ALL " +
"SELECT orderkey, day(receiptdate) FROM lineitem WHERE orderkey % 31 <> 0");
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,9 @@ public void setUp()
// this effectively disables the retries
"query.remote-task.max-error-duration", "1s",
// allow 2 out of 4 tasks to fail
"max-failed-task-percentage", "0.6"),

"max-failed-task-percentage", "0.6",
// turn off the failure detector since with the shortened timeouts, it becomes too prone to failure
"failure-detector.enabled", "false"),
Optional.empty());
executor = listeningDecorator(newCachedThreadPool());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

import com.facebook.presto.Session;
import com.facebook.presto.tests.DistributedQueryRunner;
import com.google.common.collect.ImmutableMap;

import static com.facebook.airlift.testing.Closeables.closeAllSuppress;
import static com.facebook.presto.connector.jmx.JmxMetadata.JMX_SCHEMA_NAME;
Expand All @@ -30,7 +31,11 @@ public static DistributedQueryRunner createJmxQueryRunner()
{
DistributedQueryRunner queryRunner = null;
try {
queryRunner = new DistributedQueryRunner(createSession(), 3);
queryRunner = DistributedQueryRunner.builder(createSession())
.setNodeCount(3)
// disable failure-detector to prevent flaky tests since the jmx tests rely on the number of nodes being consistent
.setCoordinatorProperties(ImmutableMap.of("failure-detector.enabled", "false"))
.build();

queryRunner.installPlugin(new JmxPlugin());
queryRunner.createCatalog("jmx", "jmx");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -232,11 +232,6 @@ public TestingPrestoServer(
.put("task.max-worker-threads", "4")
.put("exchange.client-threads", "4");

if (coordinator) {
// TODO: enable failure detector
serverProperties.put("failure-detector.enabled", "false");
}

ImmutableList.Builder<Module> modules = ImmutableList.<Module>builder()
.add(new TestingNodeModule(Optional.ofNullable(environment)))
.add(new TestingHttpServerModule(parseInt(coordinator ? coordinatorPort : "0")))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,12 @@ public static DistributedQueryRunner createRaptorQueryRunner(
Map<String, String> extraRaptorProperties)
throws Exception
{
DistributedQueryRunner queryRunner = new DistributedQueryRunner(createSession("tpch"), 2, extraProperties);
DistributedQueryRunner queryRunner = DistributedQueryRunner.builder(createSession("tpch"))
.setNodeCount(2)
// TODO: enable failure-detector for raptor. Currently enabling the failure detector causes failures.
.setCoordinatorProperties(ImmutableMap.of("failure-detector.enabled", "false"))
.setExtraProperties(extraProperties)
.build();

queryRunner.installPlugin(new TpchPlugin());
queryRunner.createCatalog("tpch", "tpch");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import static org.testng.Assert.fail;

// run single threaded to avoid creating multiple query runners at once
// failure detector is disabled in these tests to prevent flakiness since the tests assert a specific number of workers are present
@Test(singleThreaded = true)
public class TestMinWorkerRequirement
{
Expand All @@ -29,7 +30,10 @@ public void testInsufficientInitialWorkerNodes()
throws Exception
{
try (DistributedQueryRunner queryRunner = TpchQueryRunnerBuilder.builder()
.setSingleCoordinatorProperty("query-manager.initialization-required-workers", "5")
.setCoordinatorProperties(ImmutableMap.<String, String>builder()
.put("query-manager.initialization-required-workers", "5")
.put("failure-detector.enabled", "false")
.build())
.setNodeCount(4)
.build()) {
queryRunner.execute("SELECT 1");
Expand All @@ -43,7 +47,10 @@ public void testInsufficientInitialWorkerNodesWithCoordinatorExcluded()
{
try (DistributedQueryRunner queryRunner = TpchQueryRunnerBuilder.builder()
.setSingleExtraProperty("node-scheduler.include-coordinator", "false")
.setSingleCoordinatorProperty("query-manager.initialization-required-workers", "4")
.setCoordinatorProperties(ImmutableMap.<String, String>builder()
.put("query-manager.initialization-required-workers", "4")
.put("failure-detector.enabled", "false")
.build())
.setNodeCount(4)
.build()) {
queryRunner.execute("SELECT 1");
Expand All @@ -56,7 +63,10 @@ public void testSufficientInitialWorkerNodes()
throws Exception
{
try (DistributedQueryRunner queryRunner = TpchQueryRunnerBuilder.builder()
.setSingleCoordinatorProperty("query-manager.initialization-required-workers", "4")
.setCoordinatorProperties(ImmutableMap.<String, String>builder()
.put("query-manager.initialization-required-workers", "4")
.put("failure-detector.enabled", "false")
.build())
.setNodeCount(4)
.build()) {
queryRunner.execute("SELECT 1");
Expand All @@ -77,6 +87,7 @@ public void testInitializationTimeout()
.setCoordinatorProperties(ImmutableMap.<String, String>builder()
.put("query-manager.initialization-required-workers", "5")
.put("query-manager.initialization-timeout", "1ns")
.put("failure-detector.enabled", "false")
.build())
.setNodeCount(4)
.build()) {
Expand All @@ -93,6 +104,7 @@ public void testInsufficientWorkerNodes()
.setCoordinatorProperties(ImmutableMap.<String, String>builder()
.put("query-manager.required-workers", "5")
.put("query-manager.required-workers-max-wait", "1ns")
.put("failure-detector.enabled", "false")
.build())
.setNodeCount(4)
.build()) {
Expand All @@ -110,6 +122,7 @@ public void testInsufficientWorkerNodesWithCoordinatorExcluded()
.put("node-scheduler.include-coordinator", "false")
.put("query-manager.required-workers", "4")
.put("query-manager.required-workers-max-wait", "1ns")
.put("failure-detector.enabled", "false")
.build())
.setNodeCount(4)
.build()) {
Expand All @@ -126,6 +139,7 @@ public void testInsufficientWorkerNodesAfterDrop()
.setCoordinatorProperties(ImmutableMap.<String, String>builder()
.put("query-manager.required-workers", "4")
.put("query-manager.required-workers-max-wait", "1ns")
.put("failure-detector.enabled", "false")
.build())
.setNodeCount(4)
.build()) {
Expand Down

0 comments on commit 3eef1b0

Please sign in to comment.