From d69d609829a091f8f257649e87cf9ea358f589b1 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Sat, 31 Aug 2024 10:45:46 +0000 Subject: [PATCH] Add pinned timestamp utils and setting to enable/disable the feature (#15401) Signed-off-by: Sachin Kale Co-authored-by: Sachin Kale (cherry picked from commit 90148942a56fa6a4840ad2afed195071f2d3c8e6) Signed-off-by: github-actions[bot] --- .../RemoteStorePinnedTimestampsIT.java | 10 + .../common/settings/ClusterSettings.java | 1 + .../index/remote/RemoteStoreUtils.java | 176 ++++++ .../store/RemoteSegmentStoreDirectory.java | 5 + .../indices/RemoteStoreSettings.java | 15 + .../main/java/org/opensearch/node/Node.java | 3 +- .../index/remote/RemoteStoreUtilsTests.java | 545 ++++++++++++++++++ 7 files changed, 754 insertions(+), 1 deletion(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStorePinnedTimestampsIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStorePinnedTimestampsIT.java index 05ff738d2df0b..cb91c63e17245 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStorePinnedTimestampsIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStorePinnedTimestampsIT.java @@ -9,8 +9,10 @@ package org.opensearch.remotestore; import org.opensearch.common.collect.Tuple; +import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; import org.opensearch.core.action.ActionListener; +import org.opensearch.indices.RemoteStoreSettings; import org.opensearch.node.remotestore.RemoteStorePinnedTimestampService; import org.opensearch.test.OpenSearchIntegTestCase; @@ -20,6 +22,14 @@ public class RemoteStorePinnedTimestampsIT extends RemoteStoreBaseIntegTestCase { static final String INDEX_NAME = "remote-store-test-idx-1"; + @Override + protected Settings nodeSettings(int nodeOrdinal) { + return Settings.builder() + .put(super.nodeSettings(nodeOrdinal)) + .put(RemoteStoreSettings.CLUSTER_REMOTE_STORE_PINNED_TIMESTAMP_ENABLED.getKey(), true) + .build(); + } + ActionListener noOpActionListener = new ActionListener<>() { @Override public void onResponse(Void unused) {} diff --git a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java index e4e3f13b6707f..eab99c5e78b0c 100644 --- a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java @@ -764,6 +764,7 @@ public void apply(Settings value, Settings current, Settings previous) { RemoteStoreSettings.CLUSTER_REMOTE_STORE_TRANSLOG_METADATA, RemoteStoreSettings.CLUSTER_REMOTE_STORE_PINNED_TIMESTAMP_SCHEDULER_INTERVAL, RemoteStoreSettings.CLUSTER_REMOTE_STORE_PINNED_TIMESTAMP_LOOKBACK_INTERVAL, + RemoteStoreSettings.CLUSTER_REMOTE_STORE_PINNED_TIMESTAMP_ENABLED, SystemTemplatesService.SETTING_APPLICATION_BASED_CONFIGURATION_TEMPLATES_ENABLED, diff --git a/server/src/main/java/org/opensearch/index/remote/RemoteStoreUtils.java b/server/src/main/java/org/opensearch/index/remote/RemoteStoreUtils.java index a5e0c10f72301..b2bc8a0294a49 100644 --- a/server/src/main/java/org/opensearch/index/remote/RemoteStoreUtils.java +++ b/server/src/main/java/org/opensearch/index/remote/RemoteStoreUtils.java @@ -20,20 +20,27 @@ import org.opensearch.cluster.routing.RoutingTable; import org.opensearch.common.collect.Tuple; import org.opensearch.common.settings.Settings; +import org.opensearch.indices.RemoteStoreSettings; import org.opensearch.node.remotestore.RemoteStoreNodeAttribute; import org.opensearch.node.remotestore.RemoteStoreNodeService; +import org.opensearch.node.remotestore.RemoteStorePinnedTimestampService; import java.nio.ByteBuffer; +import java.util.ArrayList; import java.util.Arrays; import java.util.Base64; import java.util.Collection; import java.util.Collections; import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; import java.util.List; import java.util.Locale; import java.util.Map; import java.util.Objects; import java.util.Optional; +import java.util.Set; +import java.util.TreeSet; import java.util.function.Function; import java.util.stream.Collectors; @@ -373,4 +380,173 @@ public static boolean isSwitchToStrictCompatibilityMode(ClusterUpdateSettingsReq incomingSettings ) == RemoteStoreNodeService.CompatibilityMode.STRICT; } + + /** + * Determines and returns a set of metadata files that match provided pinned timestamps. + * + * This method is an overloaded version of getPinnedTimestampLockedFiles and do not use cached entries to find + * the metadata file + * + * @param metadataFiles A list of metadata file names. Expected to be sorted in descending order of timestamp. + * @param pinnedTimestampSet A set of timestamps representing pinned points in time. + * @param getTimestampFunction A function that extracts the timestamp from a metadata file name. + * @param prefixFunction A function that extracts a tuple of prefix information from a metadata file name. + * @return A set of metadata file names that are implicitly locked based on the pinned timestamps. + */ + public static Set getPinnedTimestampLockedFiles( + List metadataFiles, + Set pinnedTimestampSet, + Function getTimestampFunction, + Function> prefixFunction + ) { + return getPinnedTimestampLockedFiles(metadataFiles, pinnedTimestampSet, new HashMap<>(), getTimestampFunction, prefixFunction); + } + + /** + * Determines and returns a set of metadata files that match provided pinned timestamps. If pinned timestamp + * feature is not enabled, this function is a no-op. + * + * This method identifies metadata files that are considered implicitly locked due to their timestamps + * matching or being the closest preceding timestamp to the pinned timestamps. It uses a caching mechanism + * to improve performance for previously processed timestamps. + * + * The method performs the following steps: + * 1. Validates input parameters. + * 2. Updates the cache (metadataFilePinnedTimestampMap) to remove outdated entries. + * 3. Processes cached entries and identifies new timestamps to process. + * 4. For new timestamps, iterates through metadata files to find matching or closest preceding files. + * 5. Updates the cache with newly processed timestamps and their corresponding metadata files. + * + * @param metadataFiles A list of metadata file names. Expected to be sorted in descending order of timestamp. + * @param pinnedTimestampSet A set of timestamps representing pinned points in time. + * @param metadataFilePinnedTimestampMap A map used for caching processed timestamps and their corresponding metadata files. + * @param getTimestampFunction A function that extracts the timestamp from a metadata file name. + * @param prefixFunction A function that extracts a tuple of prefix information from a metadata file name. + * @return A set of metadata file names that are implicitly locked based on the pinned timestamps. + * + */ + public static Set getPinnedTimestampLockedFiles( + List metadataFiles, + Set pinnedTimestampSet, + Map metadataFilePinnedTimestampMap, + Function getTimestampFunction, + Function> prefixFunction + ) { + Set implicitLockedFiles = new HashSet<>(); + + if (RemoteStoreSettings.isPinnedTimestampsEnabled() == false) { + return implicitLockedFiles; + } + + if (metadataFiles == null || metadataFiles.isEmpty() || pinnedTimestampSet == null) { + return implicitLockedFiles; + } + + // Remove entries for timestamps that are no longer pinned + metadataFilePinnedTimestampMap.keySet().retainAll(pinnedTimestampSet); + + // Add cached entries and collect new timestamps + Set newPinnedTimestamps = new TreeSet<>(Collections.reverseOrder()); + for (Long pinnedTimestamp : pinnedTimestampSet) { + String cachedFile = metadataFilePinnedTimestampMap.get(pinnedTimestamp); + if (cachedFile != null) { + implicitLockedFiles.add(cachedFile); + } else { + newPinnedTimestamps.add(pinnedTimestamp); + } + } + + if (newPinnedTimestamps.isEmpty()) { + return implicitLockedFiles; + } + + // Sort metadata files in descending order of timestamp + // ToDo: Do we really need this? Files fetched from remote store are already lexicographically sorted. + metadataFiles.sort(String::compareTo); + + // If we have metadata files from multiple writers, it can result in picking file generated by stale primary. + // To avoid this, we fail fast. + RemoteStoreUtils.verifyNoMultipleWriters(metadataFiles, prefixFunction); + + Iterator timestampIterator = newPinnedTimestamps.iterator(); + Long currentPinnedTimestamp = timestampIterator.next(); + long prevMdTimestamp = Long.MAX_VALUE; + for (String metadataFileName : metadataFiles) { + long currentMdTimestamp = getTimestampFunction.apply(metadataFileName); + // We always prefer md file with higher values of prefix like primary term, generation etc. + if (currentMdTimestamp > prevMdTimestamp) { + continue; + } + while (currentMdTimestamp <= currentPinnedTimestamp && prevMdTimestamp > currentPinnedTimestamp) { + implicitLockedFiles.add(metadataFileName); + // Do not cache entry for latest metadata file as the next metadata can also match the same pinned timestamp + if (prevMdTimestamp != Long.MAX_VALUE) { + metadataFilePinnedTimestampMap.put(currentPinnedTimestamp, metadataFileName); + } + if (timestampIterator.hasNext() == false) { + return implicitLockedFiles; + } + currentPinnedTimestamp = timestampIterator.next(); + } + prevMdTimestamp = currentMdTimestamp; + } + + return implicitLockedFiles; + } + + /** + * Filters out metadata files based on their age and pinned timestamps settings. + * + * This method filters a list of metadata files, keeping only those that are older + * than a certain threshold determined by the last successful fetch of pinned timestamps + * and a configured lookback interval. + * + * @param metadataFiles A list of metadata file names to be filtered. + * @param getTimestampFunction A function that extracts a timestamp from a metadata file name. + * @param lastSuccessfulFetchOfPinnedTimestamps The timestamp of the last successful fetch of pinned timestamps. + * @return A new list containing only the metadata files that meet the age criteria. + * If pinned timestamps are not enabled, returns a copy of the input list. + */ + public static List filterOutMetadataFilesBasedOnAge( + List metadataFiles, + Function getTimestampFunction, + long lastSuccessfulFetchOfPinnedTimestamps + ) { + if (RemoteStoreSettings.isPinnedTimestampsEnabled() == false) { + return new ArrayList<>(metadataFiles); + } + long maximumAllowedTimestamp = lastSuccessfulFetchOfPinnedTimestamps - RemoteStoreSettings.getPinnedTimestampsLookbackInterval() + .getMillis(); + List metadataFilesWithMinAge = new ArrayList<>(); + for (String metadataFileName : metadataFiles) { + long metadataTimestamp = getTimestampFunction.apply(metadataFileName); + if (metadataTimestamp < maximumAllowedTimestamp) { + metadataFilesWithMinAge.add(metadataFileName); + } + } + return metadataFilesWithMinAge; + } + + /** + * Determines if the pinned timestamp state is stale. + * + * This method checks whether the last successful fetch of pinned timestamps + * is considered stale based on the current time and configured intervals. + * The state is considered stale if the last successful fetch occurred before + * a certain threshold, which is calculated as three times the scheduler interval + * plus the lookback interval. + * + * @return true if the pinned timestamp state is stale, false otherwise. + * Always returns false if pinned timestamps are not enabled. + */ + public static boolean isPinnedTimestampStateStale() { + if (RemoteStoreSettings.isPinnedTimestampsEnabled() == false) { + return false; + } + long lastSuccessfulFetchTimestamp = RemoteStorePinnedTimestampService.getPinnedTimestamps().v1(); + long staleBufferInMillis = (RemoteStoreSettings.getPinnedTimestampsSchedulerInterval().millis() * 3) + RemoteStoreSettings + .getPinnedTimestampsLookbackInterval() + .millis(); + return lastSuccessfulFetchTimestamp < (System.currentTimeMillis() - staleBufferInMillis); + } } diff --git a/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectory.java b/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectory.java index 3e4e027922285..af94adb0266be 100644 --- a/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectory.java +++ b/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectory.java @@ -349,6 +349,11 @@ static long getGeneration(String[] filenameTokens) { return RemoteStoreUtils.invertLong(filenameTokens[2]); } + public static long getTimestamp(String filename) { + String[] filenameTokens = filename.split(SEPARATOR); + return RemoteStoreUtils.invertLong(filenameTokens[6]); + } + public static Tuple getNodeIdByPrimaryTermAndGen(String filename) { String[] tokens = filename.split(SEPARATOR); if (tokens.length < 8) { diff --git a/server/src/main/java/org/opensearch/indices/RemoteStoreSettings.java b/server/src/main/java/org/opensearch/indices/RemoteStoreSettings.java index 495288626627b..55280ca5c96d6 100644 --- a/server/src/main/java/org/opensearch/indices/RemoteStoreSettings.java +++ b/server/src/main/java/org/opensearch/indices/RemoteStoreSettings.java @@ -134,6 +134,15 @@ public class RemoteStoreSettings { Property.Dynamic ); + /** + * Controls pinned timestamp feature enablement + */ + public static final Setting CLUSTER_REMOTE_STORE_PINNED_TIMESTAMP_ENABLED = Setting.boolSetting( + "cluster.remote_store.pinned_timestamps.enabled", + false, + Setting.Property.NodeScope + ); + /** * Controls pinned timestamp scheduler interval */ @@ -163,6 +172,7 @@ public class RemoteStoreSettings { private volatile RemoteStoreEnums.PathHashAlgorithm pathHashAlgorithm; private volatile int maxRemoteTranslogReaders; private volatile boolean isTranslogMetadataEnabled; + private static volatile boolean isPinnedTimestampsEnabled; private static volatile TimeValue pinnedTimestampsSchedulerInterval; private static volatile TimeValue pinnedTimestampsLookbackInterval; @@ -205,6 +215,7 @@ public RemoteStoreSettings(Settings settings, ClusterSettings clusterSettings) { pinnedTimestampsSchedulerInterval = CLUSTER_REMOTE_STORE_PINNED_TIMESTAMP_SCHEDULER_INTERVAL.get(settings); pinnedTimestampsLookbackInterval = CLUSTER_REMOTE_STORE_PINNED_TIMESTAMP_LOOKBACK_INTERVAL.get(settings); + isPinnedTimestampsEnabled = CLUSTER_REMOTE_STORE_PINNED_TIMESTAMP_ENABLED.get(settings); } public TimeValue getClusterRemoteTranslogBufferInterval() { @@ -280,4 +291,8 @@ public static TimeValue getPinnedTimestampsSchedulerInterval() { public static TimeValue getPinnedTimestampsLookbackInterval() { return pinnedTimestampsLookbackInterval; } + + public static boolean isPinnedTimestampsEnabled() { + return isPinnedTimestampsEnabled; + } } diff --git a/server/src/main/java/org/opensearch/node/Node.java b/server/src/main/java/org/opensearch/node/Node.java index 97b8e58f03e5c..b3e6c419aae0d 100644 --- a/server/src/main/java/org/opensearch/node/Node.java +++ b/server/src/main/java/org/opensearch/node/Node.java @@ -304,6 +304,7 @@ import static org.opensearch.common.util.FeatureFlags.TELEMETRY; import static org.opensearch.env.NodeEnvironment.collectFileCacheDataPath; import static org.opensearch.index.ShardIndexingPressureSettings.SHARD_INDEXING_PRESSURE_ENABLED_ATTRIBUTE_KEY; +import static org.opensearch.indices.RemoteStoreSettings.CLUSTER_REMOTE_STORE_PINNED_TIMESTAMP_ENABLED; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.isRemoteStoreAttributePresent; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.isRemoteStoreClusterStateEnabled; @@ -811,7 +812,7 @@ protected Node( remoteClusterStateCleanupManager = null; } final RemoteStorePinnedTimestampService remoteStorePinnedTimestampService; - if (isRemoteStoreAttributePresent(settings)) { + if (isRemoteStoreAttributePresent(settings) && CLUSTER_REMOTE_STORE_PINNED_TIMESTAMP_ENABLED.get(settings)) { remoteStorePinnedTimestampService = new RemoteStorePinnedTimestampService( repositoriesServiceReference::get, settings, diff --git a/server/src/test/java/org/opensearch/index/remote/RemoteStoreUtilsTests.java b/server/src/test/java/org/opensearch/index/remote/RemoteStoreUtilsTests.java index ec48032df4a15..ceaee8337ae34 100644 --- a/server/src/test/java/org/opensearch/index/remote/RemoteStoreUtilsTests.java +++ b/server/src/test/java/org/opensearch/index/remote/RemoteStoreUtilsTests.java @@ -25,23 +25,29 @@ import org.opensearch.common.UUIDs; import org.opensearch.common.blobstore.BlobMetadata; import org.opensearch.common.blobstore.support.PlainBlobMetadata; +import org.opensearch.common.collect.Tuple; +import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; import org.opensearch.core.index.Index; import org.opensearch.core.index.shard.ShardId; import org.opensearch.index.shard.IndexShardTestUtils; import org.opensearch.index.store.RemoteSegmentStoreDirectory; import org.opensearch.index.translog.transfer.TranslogTransferMetadata; +import org.opensearch.indices.RemoteStoreSettings; import org.opensearch.indices.replication.common.ReplicationType; import org.opensearch.node.remotestore.RemoteStoreNodeAttribute; import org.opensearch.test.OpenSearchTestCase; import java.math.BigInteger; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.HashMap; +import java.util.HashSet; import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.UUID; import java.util.stream.Collectors; @@ -60,6 +66,7 @@ import static org.opensearch.index.store.RemoteSegmentStoreDirectory.MetadataFilenameUtils.METADATA_PREFIX; import static org.opensearch.index.store.RemoteSegmentStoreDirectory.MetadataFilenameUtils.SEPARATOR; import static org.opensearch.index.translog.transfer.TranslogTransferMetadata.METADATA_SEPARATOR; +import static org.opensearch.indices.RemoteStoreSettings.CLUSTER_REMOTE_STORE_PINNED_TIMESTAMP_ENABLED; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeService.REMOTE_STORE_COMPATIBILITY_MODE_SETTING; @@ -537,4 +544,542 @@ private Map getRemoteStoreNodeAttributes() { remoteStoreNodeAttributes.put(REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY, "my-translog-repo-1"); return remoteStoreNodeAttributes; } + + private void setupRemotePinnedTimestampFeature(boolean enabled) { + RemoteStoreSettings remoteStoreSettings = new RemoteStoreSettings( + Settings.builder().put(CLUSTER_REMOTE_STORE_PINNED_TIMESTAMP_ENABLED.getKey(), enabled).build(), + new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS) + ); + } + + public void testGetPinnedTimestampLockedFilesFeatureDisabled() { + setupRemotePinnedTimestampFeature(false); + // Pinned timestamps 800, 900, 1000, 2000 + // Metadata with timestamp 990, 995, 1000, 1001 + // Metadata timestamp 1000 <= Pinned Timestamp 1000 + // Metadata timestamp 1001 <= Pinned Timestamp 2000 + Map metadataFilePinnedTimestampCache = new HashMap<>(); + Tuple, Set> metadataAndLocks = testGetPinnedTimestampLockedFilesWithPinnedTimestamps( + List.of(990L, 995L, 1000L, 1001L), + Set.of(800L, 900L, 1000L, 2000L), + metadataFilePinnedTimestampCache + ); + Map metadataFiles = metadataAndLocks.v1(); + Set implicitLockedFiles = metadataAndLocks.v2(); + + assertEquals(0, implicitLockedFiles.size()); + assertEquals(0, metadataFilePinnedTimestampCache.size()); + } + + public void testGetPinnedTimestampLockedFilesWithEmptyMetadataFiles() { + setupRemotePinnedTimestampFeature(true); + List metadataFiles = Collections.emptyList(); + Set pinnedTimestampSet = new HashSet<>(Arrays.asList(1L, 2L, 3L)); + Set implicitLockedFiles = RemoteStoreUtils.getPinnedTimestampLockedFiles( + metadataFiles, + pinnedTimestampSet, + new HashMap<>(), + RemoteSegmentStoreDirectory.MetadataFilenameUtils::getTimestamp, + RemoteSegmentStoreDirectory.MetadataFilenameUtils::getNodeIdByPrimaryTermAndGen + ); + assertTrue(implicitLockedFiles.isEmpty()); + } + + public void testGetPinnedTimestampLockedFilesWithNoPinnedTimestamps() { + setupRemotePinnedTimestampFeature(true); + List metadataFiles = Arrays.asList("file1.txt", "file2.txt", "file3.txt"); + Set pinnedTimestampSet = Collections.emptySet(); + Set implicitLockedFiles = RemoteStoreUtils.getPinnedTimestampLockedFiles( + metadataFiles, + pinnedTimestampSet, + new HashMap<>(), + RemoteSegmentStoreDirectory.MetadataFilenameUtils::getTimestamp, + RemoteSegmentStoreDirectory.MetadataFilenameUtils::getNodeIdByPrimaryTermAndGen + ); + assertTrue(implicitLockedFiles.isEmpty()); + } + + public void testGetPinnedTimestampLockedFilesWithNullMetadataFiles() { + setupRemotePinnedTimestampFeature(true); + List metadataFiles = null; + Set pinnedTimestampSet = new HashSet<>(Arrays.asList(1L, 2L, 3L)); + Set implicitLockedFiles = RemoteStoreUtils.getPinnedTimestampLockedFiles( + metadataFiles, + pinnedTimestampSet, + new HashMap<>(), + RemoteSegmentStoreDirectory.MetadataFilenameUtils::getTimestamp, + RemoteSegmentStoreDirectory.MetadataFilenameUtils::getNodeIdByPrimaryTermAndGen + ); + assertTrue(implicitLockedFiles.isEmpty()); + } + + public void testGetPinnedTimestampLockedFilesWithNullPinnedTimestampSet() { + setupRemotePinnedTimestampFeature(true); + List metadataFiles = Arrays.asList("file1.txt", "file2.txt", "file3.txt"); + Set pinnedTimestampSet = null; + Set implicitLockedFiles = RemoteStoreUtils.getPinnedTimestampLockedFiles( + metadataFiles, + pinnedTimestampSet, + new HashMap<>(), + RemoteSegmentStoreDirectory.MetadataFilenameUtils::getTimestamp, + RemoteSegmentStoreDirectory.MetadataFilenameUtils::getNodeIdByPrimaryTermAndGen + ); + assertTrue(implicitLockedFiles.isEmpty()); + } + + private Tuple, Set> testGetPinnedTimestampLockedFilesWithPinnedTimestamps( + List metadataFileTimestamps, + Set pinnedTimetamps, + Map metadataFilePinnedTimestampCache + ) { + String metadataPrefix = "metadata__1__2__3__4__5__"; + Map metadataFiles = new HashMap<>(); + for (Long metadataFileTimestamp : metadataFileTimestamps) { + metadataFiles.put(metadataFileTimestamp, metadataPrefix + RemoteStoreUtils.invertLong(metadataFileTimestamp)); + } + return new Tuple<>( + metadataFiles, + RemoteStoreUtils.getPinnedTimestampLockedFiles( + new ArrayList<>(metadataFiles.values()), + pinnedTimetamps, + metadataFilePinnedTimestampCache, + RemoteSegmentStoreDirectory.MetadataFilenameUtils::getTimestamp, + RemoteSegmentStoreDirectory.MetadataFilenameUtils::getNodeIdByPrimaryTermAndGen + ) + ); + } + + private Tuple, Set> testGetPinnedTimestampLockedFilesWithPinnedTimestamps( + Map metadataFileTimestampsPrimaryTermMap, + Set pinnedTimetamps, + Map metadataFilePinnedTimestampCache + ) { + setupRemotePinnedTimestampFeature(true); + Map metadataFiles = new HashMap<>(); + for (Map.Entry metadataFileTimestampPrimaryTerm : metadataFileTimestampsPrimaryTermMap.entrySet()) { + String primaryTerm = RemoteStoreUtils.invertLong(metadataFileTimestampPrimaryTerm.getValue()); + String metadataPrefix = "metadata__" + primaryTerm + "__2__3__4__5__"; + long metadataFileTimestamp = metadataFileTimestampPrimaryTerm.getKey(); + metadataFiles.put(metadataFileTimestamp, metadataPrefix + RemoteStoreUtils.invertLong(metadataFileTimestamp)); + } + return new Tuple<>( + metadataFiles, + RemoteStoreUtils.getPinnedTimestampLockedFiles( + new ArrayList<>(metadataFiles.values()), + pinnedTimetamps, + metadataFilePinnedTimestampCache, + RemoteSegmentStoreDirectory.MetadataFilenameUtils::getTimestamp, + RemoteSegmentStoreDirectory.MetadataFilenameUtils::getNodeIdByPrimaryTermAndGen + ) + ); + } + + public void testGetPinnedTimestampLockedFilesWithPinnedTimestamps() { + setupRemotePinnedTimestampFeature(true); + + Map metadataFilePinnedTimestampCache = new HashMap<>(); + + // Pinned timestamps 800, 900 + // Metadata with timestamp 990 + // No metadata matches the timestamp + Tuple, Set> metadataAndLocks = testGetPinnedTimestampLockedFilesWithPinnedTimestamps( + List.of(990L), + Set.of(800L, 900L), + metadataFilePinnedTimestampCache + ); + Map metadataFiles = metadataAndLocks.v1(); + Set implicitLockedFiles = metadataAndLocks.v2(); + + assertEquals(0, implicitLockedFiles.size()); + assertEquals(0, metadataFilePinnedTimestampCache.size()); + + // Pinned timestamps 800, 900, 1000 + // Metadata with timestamp 990 + // Metadata timestamp 990 <= Pinned Timestamp 1000 + metadataFilePinnedTimestampCache = new HashMap<>(); + metadataAndLocks = testGetPinnedTimestampLockedFilesWithPinnedTimestamps( + List.of(990L), + Set.of(800L, 900L, 1000L), + metadataFilePinnedTimestampCache + ); + metadataFiles = metadataAndLocks.v1(); + implicitLockedFiles = metadataAndLocks.v2(); + + assertEquals(1, implicitLockedFiles.size()); + assertTrue(implicitLockedFiles.contains(metadataFiles.get(990L))); + // This is still 0 as we don't cache the latest metadata file as it can change (explained in the next test case) + assertEquals(0, metadataFilePinnedTimestampCache.size()); + + // Pinned timestamps 800, 900, 1000 + // Metadata with timestamp 990, 995 + // Metadata timestamp 995 <= Pinned Timestamp 1000 + metadataFilePinnedTimestampCache = new HashMap<>(); + metadataAndLocks = testGetPinnedTimestampLockedFilesWithPinnedTimestamps( + List.of(990L, 995L), + Set.of(800L, 900L, 1000L), + metadataFilePinnedTimestampCache + ); + metadataFiles = metadataAndLocks.v1(); + implicitLockedFiles = metadataAndLocks.v2(); + + assertEquals(1, implicitLockedFiles.size()); + assertTrue(implicitLockedFiles.contains(metadataFiles.get(995L))); + // This is still 0 as we don't cache the latest metadata file as it can change + assertEquals(0, metadataFilePinnedTimestampCache.size()); + + // Pinned timestamps 800, 900, 1000 + // Metadata with timestamp 990, 995, 1000 + // Metadata timestamp 1000 <= Pinned Timestamp 1000 + metadataFilePinnedTimestampCache = new HashMap<>(); + metadataAndLocks = testGetPinnedTimestampLockedFilesWithPinnedTimestamps( + List.of(990L, 995L, 1000L), + Set.of(800L, 900L, 1000L), + metadataFilePinnedTimestampCache + ); + metadataFiles = metadataAndLocks.v1(); + implicitLockedFiles = metadataAndLocks.v2(); + + assertEquals(1, implicitLockedFiles.size()); + assertTrue(implicitLockedFiles.contains(metadataFiles.get(1000L))); + // This is still 0 as we don't cache the latest metadata file as it can change + assertEquals(0, metadataFilePinnedTimestampCache.size()); + + // Pinned timestamps 800, 900, 1000, 2000 + // Metadata with timestamp 990, 995, 1000, 1001 + // Metadata timestamp 1000 <= Pinned Timestamp 1000 + // Metadata timestamp 1001 <= Pinned Timestamp 2000 + metadataFilePinnedTimestampCache = new HashMap<>(); + metadataAndLocks = testGetPinnedTimestampLockedFilesWithPinnedTimestamps( + List.of(990L, 995L, 1000L, 1001L), + Set.of(800L, 900L, 1000L, 2000L), + metadataFilePinnedTimestampCache + ); + metadataFiles = metadataAndLocks.v1(); + implicitLockedFiles = metadataAndLocks.v2(); + + assertEquals(2, implicitLockedFiles.size()); + assertTrue(implicitLockedFiles.contains(metadataFiles.get(1000L))); + assertTrue(implicitLockedFiles.contains(metadataFiles.get(1001L))); + // Now we cache all the matches except the last one. + assertEquals(1, metadataFilePinnedTimestampCache.size()); + assertEquals(metadataFiles.get(1000L), metadataFilePinnedTimestampCache.get(1000L)); + + // Pinned timestamps 800, 900, 1000, 2000, 3000, 4000, 5000 + // Metadata with timestamp 990, 995, 1000, 1001 + // Metadata timestamp 1000 <= Pinned Timestamp 1000 + // Metadata timestamp 1001 <= Pinned Timestamp 2000 + // Metadata timestamp 1001 <= Pinned Timestamp 3000 + // Metadata timestamp 1001 <= Pinned Timestamp 4000 + // Metadata timestamp 1001 <= Pinned Timestamp 5000 + metadataFilePinnedTimestampCache = new HashMap<>(); + metadataAndLocks = testGetPinnedTimestampLockedFilesWithPinnedTimestamps( + List.of(990L, 995L, 1000L, 1001L), + Set.of(800L, 900L, 1000L, 2000L, 3000L, 4000L, 5000L), + metadataFilePinnedTimestampCache + ); + metadataFiles = metadataAndLocks.v1(); + implicitLockedFiles = metadataAndLocks.v2(); + + assertEquals(2, implicitLockedFiles.size()); + assertTrue(implicitLockedFiles.contains(metadataFiles.get(1000L))); + assertTrue(implicitLockedFiles.contains(metadataFiles.get(1001L))); + // Now we cache all the matches except the last one. + assertEquals(1, metadataFilePinnedTimestampCache.size()); + assertEquals(metadataFiles.get(1000L), metadataFilePinnedTimestampCache.get(1000L)); + + // Pinned timestamps 800, 900, 1000, 2000, 3000, 4000, 5000 + // Metadata with timestamp 990, 995, 1000, 1001, 1900, 2300 + // Metadata timestamp 1000 <= Pinned Timestamp 1000 + // Metadata timestamp 1900 <= Pinned Timestamp 2000 + // Metadata timestamp 2300 <= Pinned Timestamp 3000 + // Metadata timestamp 2300 <= Pinned Timestamp 4000 + // Metadata timestamp 2300 <= Pinned Timestamp 5000 + metadataFilePinnedTimestampCache = new HashMap<>(); + metadataAndLocks = testGetPinnedTimestampLockedFilesWithPinnedTimestamps( + List.of(990L, 995L, 1000L, 1001L, 1900L, 2300L), + Set.of(800L, 900L, 1000L, 2000L, 3000L, 4000L, 5000L), + metadataFilePinnedTimestampCache + ); + metadataFiles = metadataAndLocks.v1(); + implicitLockedFiles = metadataAndLocks.v2(); + + assertEquals(3, implicitLockedFiles.size()); + assertTrue(implicitLockedFiles.contains(metadataFiles.get(1000L))); + assertTrue(implicitLockedFiles.contains(metadataFiles.get(1900L))); + assertTrue(implicitLockedFiles.contains(metadataFiles.get(2300L))); + // Now we cache all the matches except the last one. + assertEquals(2, metadataFilePinnedTimestampCache.size()); + assertEquals(metadataFiles.get(1000L), metadataFilePinnedTimestampCache.get(1000L)); + assertEquals(metadataFiles.get(1900L), metadataFilePinnedTimestampCache.get(2000L)); + + // Pinned timestamps 2000, 3000, 4000, 5000 + // Metadata with timestamp 990, 995, 1000, 1001, 1900, 2300 + // Metadata timestamp 1900 <= Pinned Timestamp 2000 + // Metadata timestamp 2300 <= Pinned Timestamp 3000 + // Metadata timestamp 2300 <= Pinned Timestamp 4000 + // Metadata timestamp 2300 <= Pinned Timestamp 5000 + metadataFilePinnedTimestampCache = new HashMap<>(); + metadataAndLocks = testGetPinnedTimestampLockedFilesWithPinnedTimestamps( + List.of(990L, 995L, 1000L, 1001L, 1900L, 2300L), + Set.of(2000L, 3000L, 4000L, 5000L), + metadataFilePinnedTimestampCache + ); + metadataFiles = metadataAndLocks.v1(); + implicitLockedFiles = metadataAndLocks.v2(); + + assertEquals(2, implicitLockedFiles.size()); + assertTrue(implicitLockedFiles.contains(metadataFiles.get(1900L))); + assertTrue(implicitLockedFiles.contains(metadataFiles.get(2300L))); + // Now we cache all the matches except the last one. + assertEquals(1, metadataFilePinnedTimestampCache.size()); + assertEquals(metadataFiles.get(1900L), metadataFilePinnedTimestampCache.get(2000L)); + + // Pinned timestamps 2000, 3000, 4000, 5000 + // Metadata with timestamp 1001, 1900, 2300, 3000, 3001, 5500, 6000 + // Metadata timestamp 1900 <= Pinned Timestamp 2000 + // Metadata timestamp 3000 <= Pinned Timestamp 3000 + // Metadata timestamp 3001 <= Pinned Timestamp 4000 + // Metadata timestamp 3001 <= Pinned Timestamp 5000 + metadataFilePinnedTimestampCache = new HashMap<>(); + metadataAndLocks = testGetPinnedTimestampLockedFilesWithPinnedTimestamps( + List.of(1001L, 1900L, 2300L, 3000L, 3001L, 5500L, 6000L), + Set.of(2000L, 3000L, 4000L, 5000L), + metadataFilePinnedTimestampCache + ); + metadataFiles = metadataAndLocks.v1(); + implicitLockedFiles = metadataAndLocks.v2(); + + assertEquals(3, implicitLockedFiles.size()); + assertTrue(implicitLockedFiles.contains(metadataFiles.get(1900L))); + assertTrue(implicitLockedFiles.contains(metadataFiles.get(3000L))); + assertTrue(implicitLockedFiles.contains(metadataFiles.get(3001L))); + // Now we cache all the matches except the last one. + assertEquals(4, metadataFilePinnedTimestampCache.size()); + assertEquals(metadataFiles.get(1900L), metadataFilePinnedTimestampCache.get(2000L)); + assertEquals(metadataFiles.get(3000L), metadataFilePinnedTimestampCache.get(3000L)); + assertEquals(metadataFiles.get(3001L), metadataFilePinnedTimestampCache.get(4000L)); + assertEquals(metadataFiles.get(3001L), metadataFilePinnedTimestampCache.get(5000L)); + + // Pinned timestamps 4000, 5000, 6000, 7000 + // Metadata with timestamp 2300, 3000, 3001, 5500, 6000 + // Metadata timestamp 3001 <= Pinned Timestamp 4000 + // Metadata timestamp 3001 <= Pinned Timestamp 5000 + // Metadata timestamp 6000 <= Pinned Timestamp 6000 + // Metadata timestamp 6000 <= Pinned Timestamp 7000 + metadataFilePinnedTimestampCache = new HashMap<>(); + metadataAndLocks = testGetPinnedTimestampLockedFilesWithPinnedTimestamps( + List.of(2300L, 3000L, 3001L, 5500L, 6000L), + Set.of(4000L, 5000L, 6000L, 7000L), + metadataFilePinnedTimestampCache + ); + metadataFiles = metadataAndLocks.v1(); + implicitLockedFiles = metadataAndLocks.v2(); + + assertEquals(2, implicitLockedFiles.size()); + assertTrue(implicitLockedFiles.contains(metadataFiles.get(3001L))); + assertTrue(implicitLockedFiles.contains(metadataFiles.get(6000L))); + // Now we cache all the matches except the last one. + assertEquals(2, metadataFilePinnedTimestampCache.size()); + assertEquals(metadataFiles.get(3001L), metadataFilePinnedTimestampCache.get(4000L)); + assertEquals(metadataFiles.get(3001L), metadataFilePinnedTimestampCache.get(5000L)); + } + + public void testGetPinnedTimestampLockedFilesWithPinnedTimestampsDifferentPrefix() { + setupRemotePinnedTimestampFeature(true); + + Map metadataFilePinnedTimestampCache = new HashMap<>(); + + // Pinned timestamp 7000 + // Primary Term - Timestamp in md file + // 6 - 7002 + // 6 - 6998 + // 5 - 6995 + // 5 - 6990 + Tuple, Set> metadataAndLocks = testGetPinnedTimestampLockedFilesWithPinnedTimestamps( + Map.of(7002L, 6L, 6998L, 6L, 6995L, 5L, 6990L, 5L), + Set.of(4000L, 5000L, 6000L, 7000L), + metadataFilePinnedTimestampCache + ); + Map metadataFiles = metadataAndLocks.v1(); + Set implicitLockedFiles = metadataAndLocks.v2(); + + assertEquals(1, implicitLockedFiles.size()); + assertTrue(implicitLockedFiles.contains(metadataFiles.get(6998L))); + // Now we cache all the matches except the last one. + assertEquals(1, metadataFilePinnedTimestampCache.size()); + assertEquals(metadataFiles.get(6998L), metadataFilePinnedTimestampCache.get(7000L)); + + // Pinned timestamp 7000 + // Primary Term - Timestamp in md file + // 6 - 7002 + // 5 - 6998 + // 5 - 6995 + // 5 - 6990 + metadataFilePinnedTimestampCache = new HashMap<>(); + metadataAndLocks = testGetPinnedTimestampLockedFilesWithPinnedTimestamps( + Map.of(7002L, 6L, 6998L, 5L, 6995L, 5L, 6990L, 5L), + Set.of(4000L, 5000L, 6000L, 7000L), + metadataFilePinnedTimestampCache + ); + metadataFiles = metadataAndLocks.v1(); + implicitLockedFiles = metadataAndLocks.v2(); + + assertEquals(1, implicitLockedFiles.size()); + assertTrue(implicitLockedFiles.contains(metadataFiles.get(6998L))); + // Now we cache all the matches except the last one. + assertEquals(1, metadataFilePinnedTimestampCache.size()); + assertEquals(metadataFiles.get(6998L), metadataFilePinnedTimestampCache.get(7000L)); + + // Pinned timestamp 7000 + // Primary Term - Timestamp in md file + // 6 - 7002 + // 6 - 6998 + // 5 - 7001 + // 5 - 6990 + metadataFilePinnedTimestampCache = new HashMap<>(); + metadataAndLocks = testGetPinnedTimestampLockedFilesWithPinnedTimestamps( + Map.of(7002L, 6L, 6998L, 6L, 7001L, 5L, 6990L, 5L), + Set.of(4000L, 5000L, 6000L, 7000L), + metadataFilePinnedTimestampCache + ); + metadataFiles = metadataAndLocks.v1(); + implicitLockedFiles = metadataAndLocks.v2(); + + assertEquals(1, implicitLockedFiles.size()); + assertTrue(implicitLockedFiles.contains(metadataFiles.get(6998L))); + // Now we cache all the matches except the last one. + assertEquals(1, metadataFilePinnedTimestampCache.size()); + assertEquals(metadataFiles.get(6998L), metadataFilePinnedTimestampCache.get(7000L)); + + // Pinned timestamp 7000 + // Primary Term - Timestamp in md file + // 6 - 7002 + // 5 - 7005 + // 5 - 6990 + metadataFilePinnedTimestampCache = new HashMap<>(); + metadataAndLocks = testGetPinnedTimestampLockedFilesWithPinnedTimestamps( + Map.of(7002L, 6L, 7005L, 5L, 6990L, 5L), + Set.of(4000L, 5000L, 6000L, 7000L), + metadataFilePinnedTimestampCache + ); + metadataFiles = metadataAndLocks.v1(); + implicitLockedFiles = metadataAndLocks.v2(); + + assertEquals(1, implicitLockedFiles.size()); + assertTrue(implicitLockedFiles.contains(metadataFiles.get(6990L))); + // Now we cache all the matches except the last one. + assertEquals(1, metadataFilePinnedTimestampCache.size()); + assertEquals(metadataFiles.get(6990L), metadataFilePinnedTimestampCache.get(7000L)); + + // Pinned timestamp 7000 + // Primary Term - Timestamp in md file + // 6 - 6999 + // 5 - 7005 + // 5 - 6990 + metadataFilePinnedTimestampCache = new HashMap<>(); + metadataAndLocks = testGetPinnedTimestampLockedFilesWithPinnedTimestamps( + Map.of(6999L, 6L, 7005L, 5L, 6990L, 5L), + Set.of(4000L, 5000L, 6000L, 7000L), + metadataFilePinnedTimestampCache + ); + metadataFiles = metadataAndLocks.v1(); + implicitLockedFiles = metadataAndLocks.v2(); + + assertEquals(1, implicitLockedFiles.size()); + assertTrue(implicitLockedFiles.contains(metadataFiles.get(6999L))); + // Now we cache all the matches except the last one. + assertEquals(0, metadataFilePinnedTimestampCache.size()); + } + + public void testFilterOutMetadataFilesBasedOnAgeFeatureDisabled() { + setupRemotePinnedTimestampFeature(false); + List metadataFiles = new ArrayList<>(); + + for (int i = 0; i < randomIntBetween(5, 10); i++) { + metadataFiles.add((System.currentTimeMillis() - randomIntBetween(-150000, 150000)) + "_file" + i + ".txt"); + } + + List result = RemoteStoreUtils.filterOutMetadataFilesBasedOnAge( + metadataFiles, + file -> Long.valueOf(file.split("_")[0]), + System.currentTimeMillis() + ); + assertEquals(metadataFiles, result); + } + + public void testFilterOutMetadataFilesBasedOnAge_AllFilesOldEnough() { + setupRemotePinnedTimestampFeature(true); + + List metadataFiles = Arrays.asList( + (System.currentTimeMillis() - 150000) + "_file1.txt", + (System.currentTimeMillis() - 300000) + "_file2.txt", + (System.currentTimeMillis() - 450000) + "_file3.txt" + ); + + List result = RemoteStoreUtils.filterOutMetadataFilesBasedOnAge( + metadataFiles, + file -> Long.valueOf(file.split("_")[0]), + System.currentTimeMillis() + ); + assertEquals(metadataFiles, result); + } + + public void testFilterOutMetadataFilesBasedOnAge_SomeFilesTooNew() { + setupRemotePinnedTimestampFeature(true); + + String file1 = (System.currentTimeMillis() - 150000) + "_file1.txt"; + String file2 = (System.currentTimeMillis() - 300000) + "_file2.txt"; + String file3 = (System.currentTimeMillis() + 450000) + "_file3.txt"; + + List metadataFiles = Arrays.asList(file1, file2, file3); + + List result = RemoteStoreUtils.filterOutMetadataFilesBasedOnAge( + metadataFiles, + file -> Long.valueOf(file.split("_")[0]), + System.currentTimeMillis() + ); + List expected = Arrays.asList(file1, file2); + assertEquals(expected, result); + } + + public void testFilterOutMetadataFilesBasedOnAge_AllFilesTooNew() { + setupRemotePinnedTimestampFeature(true); + + String file1 = (System.currentTimeMillis() + 150000) + "_file1.txt"; + String file2 = (System.currentTimeMillis() + 300000) + "_file2.txt"; + String file3 = (System.currentTimeMillis() + 450000) + "_file3.txt"; + + List metadataFiles = Arrays.asList(file1, file2, file3); + + List result = RemoteStoreUtils.filterOutMetadataFilesBasedOnAge( + metadataFiles, + file -> Long.valueOf(file.split("_")[0]), + System.currentTimeMillis() + ); + assertTrue(result.isEmpty()); + } + + public void testFilterOutMetadataFilesBasedOnAge_EmptyInputList() { + setupRemotePinnedTimestampFeature(true); + + List metadataFiles = Arrays.asList(); + + List result = RemoteStoreUtils.filterOutMetadataFilesBasedOnAge( + metadataFiles, + file -> Long.valueOf(file.split("_")[0]), + System.currentTimeMillis() + ); + assertTrue(result.isEmpty()); + } + + public void testIsPinnedTimestampStateStaleFeatureDisabled() { + setupRemotePinnedTimestampFeature(false); + assertFalse(RemoteStoreUtils.isPinnedTimestampStateStale()); + } + + public void testIsPinnedTimestampStateStaleFeatureEnabled() { + setupRemotePinnedTimestampFeature(true); + assertTrue(RemoteStoreUtils.isPinnedTimestampStateStale()); + } + }