Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add batch async shard fetch transport action for replica #8218 #8356

Merged
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
07ba88a
Add batch async shard fetch transport action for replica
sudarshan-baliga Jul 24, 2023
45aadc8
Add RSA Async batch shard fetch transport integ test
sudarshan-baliga Aug 3, 2023
e2b573c
Update the documentation of TransportNodesListShardStoreMetadataBatch.
sudarshan-baliga Aug 6, 2023
86e8ec1
Merge branch 'main' into async-shard-fetch-rsatransport
sudarshan-baliga Aug 6, 2023
344a4f1
Transport RSA refactor
shiv0408 Sep 25, 2023
24ec9a2
Correcting the refernce of StoreFilesMetadata
shiv0408 Sep 27, 2023
88cb94e
Added helper class to remove code duplication
shiv0408 Dec 13, 2023
63682c1
Merge branch 'main' into async-shard-fetch-rsatransport
shiv0408 Jan 8, 2024
d623b28
Fix build failure after main merge
shiv0408 Jan 8, 2024
934ba3f
Removed AsyncBatchShardFetch
shiv0408 Jan 25, 2024
2d96a53
Address review comments
shiv0408 Jan 29, 2024
5bc6202
Spotless changes
shiv0408 Jan 29, 2024
875f4fa
Added UsingBatchAction suffix to ITs
shiv0408 Feb 1, 2024
4a4da6b
Merge branch 'main' into async-shard-fetch-rsatransport
shiv0408 Feb 2, 2024
5a328ca
Apply Spotless
shiv0408 Feb 2, 2024
5f5fbba
Fix tests
shiv0408 Feb 5, 2024
fc3ab43
Merge branch 'main' into async-shard-fetch-rsatransport
shiv0408 Feb 5, 2024
57960f9
Catch OpenSearchException as well during the batch flow
shiv0408 Feb 27, 2024
d07b86b
Merge branch 'opensearch-project:main' into async-shard-fetch-rsatran…
shiv0408 Feb 27, 2024
15af614
Address comments
shiv0408 Mar 12, 2024
d0d46ab
Apply spotless
shiv0408 Mar 12, 2024
f05dfc2
Handle index not found properly and return null in response
Mar 13, 2024
143c3d4
Catch all exceptions in Batch mode
shiv0408 Mar 13, 2024
62c11d1
Merge branch 'main' into async-shard-fetch-rsatransport
shiv0408 Mar 13, 2024
29871bf
Fix Integ Test
shiv0408 Mar 13, 2024
85faccc
Update Integration test
shiv0408 Mar 13, 2024
776f625
Remove BaseShardResponse as it'll be pushed later
Mar 14, 2024
2cc687f
Modify integ test accordingly
Mar 14, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.gateway;

import org.opensearch.action.admin.cluster.state.ClusterStateRequest;
import org.opensearch.action.admin.cluster.state.ClusterStateResponse;
import org.opensearch.cluster.metadata.IndexMetadata;
import org.opensearch.cluster.node.DiscoveryNode;
import org.opensearch.core.index.Index;
import org.opensearch.core.index.shard.ShardId;
import org.opensearch.env.NodeEnvironment;
import org.opensearch.index.shard.ShardPath;

import java.io.IOException;
import java.nio.file.DirectoryStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutionException;

import static org.opensearch.test.OpenSearchIntegTestCase.client;
import static org.opensearch.test.OpenSearchIntegTestCase.internalCluster;
import static org.opensearch.test.OpenSearchIntegTestCase.resolveIndex;

public class AsyncShardFetchBatchTestUtils {

public static DiscoveryNode[] getDiscoveryNodes() throws ExecutionException, InterruptedException {
final ClusterStateRequest clusterStateRequest = new ClusterStateRequest();
clusterStateRequest.local(false);
clusterStateRequest.clear().nodes(true).routingTable(true).indices("*");
ClusterStateResponse clusterStateResponse = client().admin().cluster().state(clusterStateRequest).get();
final List<DiscoveryNode> nodes = new LinkedList<>(clusterStateResponse.getState().nodes().getDataNodes().values());
DiscoveryNode[] disNodesArr = new DiscoveryNode[nodes.size()];
nodes.toArray(disNodesArr);
return disNodesArr;
}

public static Map<ShardId, String> prepareRequestMap(String[] indices, int shardCount) {
Map<ShardId, String> shardIdCustomDataPathMap = new HashMap<>();
for (String indexName : indices) {
final Index index = resolveIndex(indexName);
final String customDataPath = IndexMetadata.INDEX_DATA_PATH_SETTING.get(
client().admin().indices().prepareGetSettings(indexName).get().getIndexToSettings().get(indexName)
);
for (int shardIdNum = 0; shardIdNum < shardCount; shardIdNum++) {
final ShardId shardId = new ShardId(index, shardIdNum);
shardIdCustomDataPathMap.put(shardId, customDataPath);
}
}
return shardIdCustomDataPathMap;
}

public static void corruptShard(String nodeName, ShardId shardId) throws IOException, InterruptedException {
for (Path path : internalCluster().getInstance(NodeEnvironment.class, nodeName).availableShardPaths(shardId)) {
final Path indexPath = path.resolve(ShardPath.INDEX_FOLDER_NAME);
if (Files.exists(indexPath)) { // multi data path might only have one path in use
try (DirectoryStream<Path> stream = Files.newDirectoryStream(indexPath)) {
for (Path item : stream) {
if (item.getFileName().toString().startsWith("segments_")) {
Files.delete(item);
}
}
}
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.indices.store;

import org.opensearch.Version;
import org.opensearch.action.admin.cluster.reroute.ClusterRerouteResponse;
import org.opensearch.action.admin.cluster.shards.ClusterSearchShardsGroup;
import org.opensearch.action.admin.cluster.shards.ClusterSearchShardsResponse;
import org.opensearch.action.support.ActionTestUtils;
import org.opensearch.cluster.node.DiscoveryNode;
import org.opensearch.cluster.routing.ShardRouting;
import org.opensearch.common.settings.Settings;
import org.opensearch.core.index.Index;
import org.opensearch.core.index.shard.ShardId;
import org.opensearch.test.OpenSearchIntegTestCase;

import java.util.Map;
import java.util.concurrent.ExecutionException;

import static java.util.Collections.emptyMap;
import static java.util.Collections.emptySet;
import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_REPLICAS;
import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SHARDS;
import static org.opensearch.gateway.AsyncShardFetchBatchTestUtils.corruptShard;
import static org.opensearch.gateway.AsyncShardFetchBatchTestUtils.getDiscoveryNodes;
import static org.opensearch.gateway.AsyncShardFetchBatchTestUtils.prepareRequestMap;

public class TransportNodesListShardStoreMetadataBatchIT extends OpenSearchIntegTestCase {
shwetathareja marked this conversation as resolved.
Show resolved Hide resolved

public void testSingleShardStoreFetch() throws ExecutionException, InterruptedException {
String indexName = "test";
DiscoveryNode[] nodes = getDiscoveryNodes();
TransportNodesListShardStoreMetadataBatch.NodesStoreFilesMetadataBatch response = prepareAndSendRequest(
new String[] { indexName },
nodes
);
Index index = resolveIndex(indexName);
ShardId shardId = new ShardId(index, 0);
TransportNodesListShardStoreMetadataBatch.NodeStoreFilesMetadata nodeStoreFilesMetadata = response.getNodesMap()
.get(nodes[0].getId())
.getNodeStoreFilesMetadataBatch()
.get(shardId);
assertNodeStoreFilesMetadataSuccessCase(nodeStoreFilesMetadata, shardId);
}

public void testShardStoreFetchMultiNodeMultiIndexes() throws Exception {
// start second node
internalCluster().startNode();
String indexName1 = "test1";
String indexName2 = "test2";
DiscoveryNode[] nodes = getDiscoveryNodes();
TransportNodesListShardStoreMetadataBatch.NodesStoreFilesMetadataBatch response = prepareAndSendRequest(
new String[] { indexName1, indexName2 },
nodes
);
ClusterSearchShardsResponse searchShardsResponse = client().admin().cluster().prepareSearchShards(indexName1, indexName2).get();
for (ClusterSearchShardsGroup clusterSearchShardsGroup : searchShardsResponse.getGroups()) {
ShardId shardId = clusterSearchShardsGroup.getShardId();
ShardRouting[] shardRoutings = clusterSearchShardsGroup.getShards();
assertEquals(2, shardRoutings.length);
for (ShardRouting shardRouting : shardRoutings) {
TransportNodesListShardStoreMetadataBatch.NodeStoreFilesMetadata nodeStoreFilesMetadata = response.getNodesMap()
.get(shardRouting.currentNodeId())
.getNodeStoreFilesMetadataBatch()
.get(shardId);
assertNodeStoreFilesMetadataSuccessCase(nodeStoreFilesMetadata, shardId);
}
}
}

public void testShardStoreFetchNodeNotConnected() {
DiscoveryNode nonExistingNode = new DiscoveryNode("foo", buildNewFakeTransportAddress(), emptyMap(), emptySet(), Version.CURRENT);
String indexName = "test";
TransportNodesListShardStoreMetadataBatch.NodesStoreFilesMetadataBatch response = prepareAndSendRequest(
new String[] { indexName },
new DiscoveryNode[] { nonExistingNode }
);
assertTrue(response.hasFailures());
assertEquals(1, response.failures().size());
assertEquals(nonExistingNode.getId(), response.failures().get(0).nodeId());
}

public void testShardStoreFetchCorruptedIndex() throws Exception {
// start second node
internalCluster().startNode();
String indexName = "test";
prepareIndices(new String[] { indexName }, 1, 1);
Map<ShardId, String> shardIdCustomDataPathMap = prepareRequestMap(new String[] { indexName }, 1);
Index index = resolveIndex(indexName);
ShardId shardId = new ShardId(index, 0);
ClusterSearchShardsResponse searchShardsResponse = client().admin().cluster().prepareSearchShards(indexName).get();
assertEquals(2, searchShardsResponse.getNodes().length);
corruptShard(searchShardsResponse.getNodes()[0].getName(), shardId);
corruptShard(searchShardsResponse.getNodes()[1].getName(), shardId);
ClusterRerouteResponse clusterRerouteResponse = client().admin().cluster().prepareReroute().setRetryFailed(false).get();
DiscoveryNode[] discoveryNodes = getDiscoveryNodes();
TransportNodesListShardStoreMetadataBatch.NodesStoreFilesMetadataBatch response;
response = ActionTestUtils.executeBlocking(
internalCluster().getInstance(TransportNodesListShardStoreMetadataBatch.class),
new TransportNodesListShardStoreMetadataBatch.Request(shardIdCustomDataPathMap, discoveryNodes)
);
TransportNodesListShardStoreMetadataBatch.NodeStoreFilesMetadata nodeStoreFilesMetadata = response.getNodesMap()
.get(discoveryNodes[0].getId())
.getNodeStoreFilesMetadataBatch()
.get(shardId);
assertNodeStoreFilesMetadataFailureCase(nodeStoreFilesMetadata, shardId);
}

private void prepareIndices(String[] indices, int numberOfPrimaryShards, int numberOfReplicaShards) {
for (String index : indices) {
createIndex(
index,
Settings.builder()
.put(SETTING_NUMBER_OF_SHARDS, numberOfPrimaryShards)
.put(SETTING_NUMBER_OF_REPLICAS, numberOfReplicaShards)
.build()
);
index(index, "type", "1");
flush(index);
}
}

private TransportNodesListShardStoreMetadataBatch.NodesStoreFilesMetadataBatch prepareAndSendRequest(
String[] indices,
DiscoveryNode[] nodes
) {
Map<ShardId, String> shardIdCustomDataPathMap = null;
prepareIndices(indices, 1, 1);
shardIdCustomDataPathMap = prepareRequestMap(indices, 1);
TransportNodesListShardStoreMetadataBatch.NodesStoreFilesMetadataBatch response;
return ActionTestUtils.executeBlocking(
internalCluster().getInstance(TransportNodesListShardStoreMetadataBatch.class),
new TransportNodesListShardStoreMetadataBatch.Request(shardIdCustomDataPathMap, nodes)
);
}

private void assertNodeStoreFilesMetadataFailureCase(
TransportNodesListShardStoreMetadataBatch.NodeStoreFilesMetadata nodeStoreFilesMetadata,
ShardId shardId
) {
assertNotNull(nodeStoreFilesMetadata.getStoreFileFetchException());
TransportNodesListShardStoreMetadataBatch.StoreFilesMetadata storeFileMetadata = nodeStoreFilesMetadata.storeFilesMetadata();
assertEquals(shardId, storeFileMetadata.shardId());
assertTrue(storeFileMetadata.peerRecoveryRetentionLeases().isEmpty());
}

private void assertNodeStoreFilesMetadataSuccessCase(
TransportNodesListShardStoreMetadataBatch.NodeStoreFilesMetadata nodeStoreFilesMetadata,
ShardId shardId
) {
assertNull(nodeStoreFilesMetadata.getStoreFileFetchException());
TransportNodesListShardStoreMetadataBatch.StoreFilesMetadata storeFileMetadata = nodeStoreFilesMetadata.storeFilesMetadata();
assertFalse(storeFileMetadata.isEmpty());
assertEquals(shardId, storeFileMetadata.shardId());
assertNotNull(storeFileMetadata.peerRecoveryRetentionLeases());
}
}
sudarshan-baliga marked this conversation as resolved.
Show resolved Hide resolved
shwetathareja marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.gateway;

import org.opensearch.action.ActionListener;
import org.opensearch.action.support.nodes.BaseNodeResponse;
import org.opensearch.action.support.nodes.BaseNodesResponse;
import org.opensearch.cluster.node.DiscoveryNode;
import org.opensearch.common.lease.Releasable;
import org.opensearch.core.index.shard.ShardId;

import java.util.Map;

/**
* This class is responsible for fetching shard data from nodes. It is analogous to AsyncShardFetch class except
* that we fetch batch of shards in this class from single transport request to a node.
* @param <T>
*
* @opensearch.internal
*/
public abstract class AsyncBatchShardFetch<T extends BaseNodeResponse> implements Releasable {
/**
* An action that lists the relevant shard data that needs to be fetched.
*/
public interface Lister<NodesResponse extends BaseNodesResponse<NodeResponse>, NodeResponse extends BaseNodeResponse> {
void list(DiscoveryNode[] nodes, Map<ShardId, String> shardToCustomDataPath, ActionListener<NodesResponse> listener);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ public void processExistingRecoveries(RoutingAllocation allocation) {
assert primaryShard != null : "the replica shard can be allocated on at least one node, so there must be an active primary";
assert primaryShard.currentNodeId() != null;
final DiscoveryNode primaryNode = allocation.nodes().get(primaryShard.currentNodeId());
final TransportNodesListShardStoreMetadata.StoreFilesMetadata primaryStore = findStore(primaryNode, shardStores);
final StoreFilesMetadata primaryStore = findStore(primaryNode, shardStores);
if (primaryStore == null) {
// if we can't find the primary data, it is probably because the primary shard is corrupted (and listing failed)
// just let the recovery find it out, no need to do anything about it for the initializing shard
Expand Down Expand Up @@ -223,7 +223,7 @@ public AllocateUnassignedDecision makeAllocationDecision(
}
assert primaryShard.currentNodeId() != null;
final DiscoveryNode primaryNode = allocation.nodes().get(primaryShard.currentNodeId());
final TransportNodesListShardStoreMetadata.StoreFilesMetadata primaryStore = findStore(primaryNode, shardStores);
final StoreFilesMetadata primaryStore = findStore(primaryNode, shardStores);
if (primaryStore == null) {
// if we can't find the primary data, it is probably because the primary shard is corrupted (and listing failed)
// we want to let the replica be allocated in order to expose the actual problem with the primary that the replica
Expand Down Expand Up @@ -357,7 +357,7 @@ private static List<NodeAllocationResult> augmentExplanationsWithStoreInfo(
/**
* Finds the store for the assigned shard in the fetched data, returns null if none is found.
*/
private static TransportNodesListShardStoreMetadata.StoreFilesMetadata findStore(
private static StoreFilesMetadata findStore(
DiscoveryNode node,
AsyncShardFetch.FetchResult<NodeStoreFilesMetadata> data
) {
Expand All @@ -373,7 +373,7 @@ private MatchingNodes findMatchingNodes(
RoutingAllocation allocation,
boolean noMatchFailedNodes,
DiscoveryNode primaryNode,
TransportNodesListShardStoreMetadata.StoreFilesMetadata primaryStore,
StoreFilesMetadata primaryStore,
AsyncShardFetch.FetchResult<NodeStoreFilesMetadata> data,
boolean explain
) {
Expand All @@ -386,7 +386,7 @@ private MatchingNodes findMatchingNodes(
&& shard.unassignedInfo().getFailedNodeIds().contains(discoNode.getId())) {
continue;
}
TransportNodesListShardStoreMetadata.StoreFilesMetadata storeFilesMetadata = nodeStoreEntry.getValue().storeFilesMetadata();
StoreFilesMetadata storeFilesMetadata = nodeStoreEntry.getValue().storeFilesMetadata();
// we don't have any files at all, it is an empty index
if (storeFilesMetadata.isEmpty()) {
continue;
Expand Down Expand Up @@ -442,8 +442,8 @@ private MatchingNodes findMatchingNodes(
}

private static long computeMatchingBytes(
TransportNodesListShardStoreMetadata.StoreFilesMetadata primaryStore,
TransportNodesListShardStoreMetadata.StoreFilesMetadata storeFilesMetadata
StoreFilesMetadata primaryStore,
StoreFilesMetadata storeFilesMetadata
) {
long sizeMatched = 0;
for (StoreFileMetadata storeFileMetadata : storeFilesMetadata) {
Expand All @@ -456,18 +456,18 @@ private static long computeMatchingBytes(
}

private static boolean hasMatchingSyncId(
TransportNodesListShardStoreMetadata.StoreFilesMetadata primaryStore,
TransportNodesListShardStoreMetadata.StoreFilesMetadata replicaStore
StoreFilesMetadata primaryStore,
StoreFilesMetadata replicaStore
) {
String primarySyncId = primaryStore.syncId();
return primarySyncId != null && primarySyncId.equals(replicaStore.syncId());
}

private static MatchingNode computeMatchingNode(
DiscoveryNode primaryNode,
TransportNodesListShardStoreMetadata.StoreFilesMetadata primaryStore,
StoreFilesMetadata primaryStore,
DiscoveryNode replicaNode,
TransportNodesListShardStoreMetadata.StoreFilesMetadata replicaStore
StoreFilesMetadata replicaStore
) {
final long retainingSeqNoForPrimary = primaryStore.getPeerRecoveryRetentionLeaseRetainingSeqNo(primaryNode);
final long retainingSeqNoForReplica = primaryStore.getPeerRecoveryRetentionLeaseRetainingSeqNo(replicaNode);
Expand All @@ -478,7 +478,7 @@ private static MatchingNode computeMatchingNode(
}

private static boolean canPerformOperationBasedRecovery(
TransportNodesListShardStoreMetadata.StoreFilesMetadata primaryStore,
StoreFilesMetadata primaryStore,
AsyncShardFetch.FetchResult<NodeStoreFilesMetadata> shardStores,
DiscoveryNode targetNode
) {
Expand Down
Loading