From 12def8a0f1a754b0f456948ef4c90edb9292d2fb Mon Sep 17 00:00:00 2001 From: Chung En Lee Date: Sun, 12 Jan 2025 15:24:50 +0800 Subject: [PATCH] HDDS-11326. Speed up TestBlockOutputStreamWithFailures (#7374) --- .../client/rpc/TestBlockOutputStream.java | 7 ++++- .../TestBlockOutputStreamWithFailures.java | 31 ++++++++++++------- 2 files changed, 25 insertions(+), 13 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestBlockOutputStream.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestBlockOutputStream.java index 63692c0dfc7..30cfb15a5dd 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestBlockOutputStream.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestBlockOutputStream.java @@ -84,6 +84,11 @@ class TestBlockOutputStream { static MiniOzoneCluster createCluster() throws IOException, InterruptedException, TimeoutException { + return createCluster(5); + } + + static MiniOzoneCluster createCluster(int datanodes) throws IOException, + InterruptedException, TimeoutException { OzoneConfiguration conf = new OzoneConfiguration(); OzoneClientConfig clientConfig = conf.getObject(OzoneClientConfig.class); clientConfig.setChecksumType(ChecksumType.NONE); @@ -126,7 +131,7 @@ static MiniOzoneCluster createCluster() throws IOException, .applyTo(conf); MiniOzoneCluster cluster = MiniOzoneCluster.newBuilder(conf) - .setNumDatanodes(5) + .setNumDatanodes(datanodes) .build(); cluster.waitForClusterToBeReady(); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestBlockOutputStreamWithFailures.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestBlockOutputStreamWithFailures.java index 010bd93834b..e6a6b672229 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestBlockOutputStreamWithFailures.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestBlockOutputStreamWithFailures.java @@ -19,17 +19,18 @@ import org.apache.commons.lang3.ArrayUtils; import org.apache.commons.lang3.RandomUtils; import org.apache.hadoop.hdds.client.ReplicationFactor; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.scm.OzoneClientConfig; import org.apache.hadoop.hdds.scm.XceiverClientRatis; import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerNotOpenException; import org.apache.hadoop.hdds.scm.pipeline.Pipeline; import org.apache.hadoop.hdds.scm.storage.RatisBlockOutputStream; +import org.apache.hadoop.ozone.HddsDatanodeService; import org.apache.hadoop.ozone.MiniOzoneCluster; import org.apache.hadoop.ozone.client.OzoneClient; import org.apache.hadoop.ozone.client.io.KeyOutputStream; import org.apache.hadoop.ozone.client.io.OzoneOutputStream; import org.apache.hadoop.ozone.container.TestHelper; -import org.apache.ozone.test.tag.Flaky; import static org.apache.hadoop.hdds.scm.client.HddsClientUtils.checkForException; import static org.apache.hadoop.ozone.client.rpc.TestBlockOutputStream.BLOCK_SIZE; @@ -49,16 +50,18 @@ import static org.junit.jupiter.api.Assertions.assertInstanceOf; import static org.junit.jupiter.api.Assertions.assertTrue; +import org.apache.ozone.test.tag.Flaky; import org.apache.ratis.protocol.exceptions.GroupMismatchException; import org.apache.ratis.protocol.exceptions.RaftRetryFailureException; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.TestInstance; import org.junit.jupiter.api.Timeout; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; import org.junit.jupiter.params.provider.MethodSource; +import java.io.IOException; import java.util.stream.Stream; /** @@ -66,16 +69,17 @@ */ @TestInstance(TestInstance.Lifecycle.PER_CLASS) @Timeout(300) +@Flaky("HDDS-11849") class TestBlockOutputStreamWithFailures { private MiniOzoneCluster cluster; - @BeforeEach + @BeforeAll void init() throws Exception { - cluster = createCluster(); + cluster = createCluster(25); } - @AfterEach + @AfterAll void shutdown() { if (cluster != null) { cluster.shutdown(); @@ -187,7 +191,6 @@ private void testWatchForCommitWithCloseContainerException(OzoneClient client) @ParameterizedTest @MethodSource("clientParameters") - @Flaky("HDDS-6113") void testWatchForCommitDatanodeFailure(boolean flushDelay, boolean enablePiggybacking) throws Exception { OzoneClientConfig config = newClientConfig(cluster.getConf(), flushDelay, enablePiggybacking); try (OzoneClient client = newClient(cluster.getConf(), config)) { @@ -246,7 +249,7 @@ void testWatchForCommitDatanodeFailure(boolean flushDelay, boolean enablePiggyba (XceiverClientRatis) blockOutputStream.getXceiverClient(); assertEquals(3, raftClient.getCommitInfoMap().size()); Pipeline pipeline = raftClient.getPipeline(); - cluster.shutdownHddsDatanode(pipeline.getNodes().get(0)); + stopAndRemove(pipeline.getNodes().get(0)); // again write data with more than max buffer limit. This will call // watchForCommit again. Since the commit will happen 2 way, the @@ -272,7 +275,6 @@ void testWatchForCommitDatanodeFailure(boolean flushDelay, boolean enablePiggyba @ParameterizedTest @MethodSource("clientParameters") - @Flaky("HDDS-11849") void test2DatanodesFailure(boolean flushDelay, boolean enablePiggybacking) throws Exception { OzoneClientConfig config = newClientConfig(cluster.getConf(), flushDelay, enablePiggybacking); try (OzoneClient client = newClient(cluster.getConf(), config)) { @@ -329,8 +331,8 @@ void test2DatanodesFailure(boolean flushDelay, boolean enablePiggybacking) throw (XceiverClientRatis) blockOutputStream.getXceiverClient(); assertEquals(3, raftClient.getCommitInfoMap().size()); Pipeline pipeline = raftClient.getPipeline(); - cluster.shutdownHddsDatanode(pipeline.getNodes().get(0)); - cluster.shutdownHddsDatanode(pipeline.getNodes().get(1)); + stopAndRemove(pipeline.getNodes().get(0)); + stopAndRemove(pipeline.getNodes().get(1)); // again write data with more than max buffer limit. This will call // watchForCommit again. Since the commit will happen 2 way, the // commitInfoMap will get updated for servers which are alive @@ -579,7 +581,6 @@ private void testWatchForCommitWithSingleNodeRatis(OzoneClient client) @ParameterizedTest @MethodSource("clientParameters") - @Flaky("HDDS-6113") void testDatanodeFailureWithSingleNode(boolean flushDelay, boolean enablePiggybacking) throws Exception { OzoneClientConfig config = newClientConfig(cluster.getConf(), flushDelay, enablePiggybacking); try (OzoneClient client = newClient(cluster.getConf(), config)) { @@ -761,4 +762,10 @@ void testDatanodeFailureWithPreAllocation(boolean flushDelay, boolean enablePigg } } + private void stopAndRemove(DatanodeDetails dn) throws IOException { + HddsDatanodeService datanode = cluster.getHddsDatanodes().remove(cluster.getHddsDatanodeIndex(dn)); + datanode.stop(); + datanode.join(); + } + }