Skip to content

Commit

Permalink
HDDS-11326. Speed up TestBlockOutputStreamWithFailures (apache#7374)
Browse files Browse the repository at this point in the history
  • Loading branch information
chungen0126 authored Jan 12, 2025
1 parent b89b6e0 commit 12def8a
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,11 @@ class TestBlockOutputStream {

static MiniOzoneCluster createCluster() throws IOException,
InterruptedException, TimeoutException {
return createCluster(5);
}

static MiniOzoneCluster createCluster(int datanodes) throws IOException,
InterruptedException, TimeoutException {
OzoneConfiguration conf = new OzoneConfiguration();
OzoneClientConfig clientConfig = conf.getObject(OzoneClientConfig.class);
clientConfig.setChecksumType(ChecksumType.NONE);
Expand Down Expand Up @@ -126,7 +131,7 @@ static MiniOzoneCluster createCluster() throws IOException,
.applyTo(conf);

MiniOzoneCluster cluster = MiniOzoneCluster.newBuilder(conf)
.setNumDatanodes(5)
.setNumDatanodes(datanodes)
.build();
cluster.waitForClusterToBeReady();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,18 @@
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.RandomUtils;
import org.apache.hadoop.hdds.client.ReplicationFactor;
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import org.apache.hadoop.hdds.scm.OzoneClientConfig;
import org.apache.hadoop.hdds.scm.XceiverClientRatis;
import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerNotOpenException;
import org.apache.hadoop.hdds.scm.pipeline.Pipeline;
import org.apache.hadoop.hdds.scm.storage.RatisBlockOutputStream;
import org.apache.hadoop.ozone.HddsDatanodeService;
import org.apache.hadoop.ozone.MiniOzoneCluster;
import org.apache.hadoop.ozone.client.OzoneClient;
import org.apache.hadoop.ozone.client.io.KeyOutputStream;
import org.apache.hadoop.ozone.client.io.OzoneOutputStream;
import org.apache.hadoop.ozone.container.TestHelper;
import org.apache.ozone.test.tag.Flaky;

import static org.apache.hadoop.hdds.scm.client.HddsClientUtils.checkForException;
import static org.apache.hadoop.ozone.client.rpc.TestBlockOutputStream.BLOCK_SIZE;
Expand All @@ -49,33 +50,36 @@
import static org.junit.jupiter.api.Assertions.assertInstanceOf;
import static org.junit.jupiter.api.Assertions.assertTrue;

import org.apache.ozone.test.tag.Flaky;
import org.apache.ratis.protocol.exceptions.GroupMismatchException;
import org.apache.ratis.protocol.exceptions.RaftRetryFailureException;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.TestInstance;
import org.junit.jupiter.api.Timeout;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;

import java.io.IOException;
import java.util.stream.Stream;

/**
* Tests failure detection and handling in BlockOutputStream Class.
*/
@TestInstance(TestInstance.Lifecycle.PER_CLASS)
@Timeout(300)
@Flaky("HDDS-11849")
class TestBlockOutputStreamWithFailures {

private MiniOzoneCluster cluster;

@BeforeEach
@BeforeAll
void init() throws Exception {
cluster = createCluster();
cluster = createCluster(25);
}

@AfterEach
@AfterAll
void shutdown() {
if (cluster != null) {
cluster.shutdown();
Expand Down Expand Up @@ -187,7 +191,6 @@ private void testWatchForCommitWithCloseContainerException(OzoneClient client)

@ParameterizedTest
@MethodSource("clientParameters")
@Flaky("HDDS-6113")
void testWatchForCommitDatanodeFailure(boolean flushDelay, boolean enablePiggybacking) throws Exception {
OzoneClientConfig config = newClientConfig(cluster.getConf(), flushDelay, enablePiggybacking);
try (OzoneClient client = newClient(cluster.getConf(), config)) {
Expand Down Expand Up @@ -246,7 +249,7 @@ void testWatchForCommitDatanodeFailure(boolean flushDelay, boolean enablePiggyba
(XceiverClientRatis) blockOutputStream.getXceiverClient();
assertEquals(3, raftClient.getCommitInfoMap().size());
Pipeline pipeline = raftClient.getPipeline();
cluster.shutdownHddsDatanode(pipeline.getNodes().get(0));
stopAndRemove(pipeline.getNodes().get(0));

// again write data with more than max buffer limit. This will call
// watchForCommit again. Since the commit will happen 2 way, the
Expand All @@ -272,7 +275,6 @@ void testWatchForCommitDatanodeFailure(boolean flushDelay, boolean enablePiggyba

@ParameterizedTest
@MethodSource("clientParameters")
@Flaky("HDDS-11849")
void test2DatanodesFailure(boolean flushDelay, boolean enablePiggybacking) throws Exception {
OzoneClientConfig config = newClientConfig(cluster.getConf(), flushDelay, enablePiggybacking);
try (OzoneClient client = newClient(cluster.getConf(), config)) {
Expand Down Expand Up @@ -329,8 +331,8 @@ void test2DatanodesFailure(boolean flushDelay, boolean enablePiggybacking) throw
(XceiverClientRatis) blockOutputStream.getXceiverClient();
assertEquals(3, raftClient.getCommitInfoMap().size());
Pipeline pipeline = raftClient.getPipeline();
cluster.shutdownHddsDatanode(pipeline.getNodes().get(0));
cluster.shutdownHddsDatanode(pipeline.getNodes().get(1));
stopAndRemove(pipeline.getNodes().get(0));
stopAndRemove(pipeline.getNodes().get(1));
// again write data with more than max buffer limit. This will call
// watchForCommit again. Since the commit will happen 2 way, the
// commitInfoMap will get updated for servers which are alive
Expand Down Expand Up @@ -579,7 +581,6 @@ private void testWatchForCommitWithSingleNodeRatis(OzoneClient client)

@ParameterizedTest
@MethodSource("clientParameters")
@Flaky("HDDS-6113")
void testDatanodeFailureWithSingleNode(boolean flushDelay, boolean enablePiggybacking) throws Exception {
OzoneClientConfig config = newClientConfig(cluster.getConf(), flushDelay, enablePiggybacking);
try (OzoneClient client = newClient(cluster.getConf(), config)) {
Expand Down Expand Up @@ -761,4 +762,10 @@ void testDatanodeFailureWithPreAllocation(boolean flushDelay, boolean enablePigg
}
}

private void stopAndRemove(DatanodeDetails dn) throws IOException {
HddsDatanodeService datanode = cluster.getHddsDatanodes().remove(cluster.getHddsDatanodeIndex(dn));
datanode.stop();
datanode.join();
}

}

0 comments on commit 12def8a

Please sign in to comment.