diff --git a/CHANGELOG.md b/CHANGELOG.md index 13ab5535d364b..ee78241044ff7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -129,6 +129,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Add instrumentation for indexing in transport bulk action and transport shard bulk action. ([#10273](https://github.com/opensearch-project/OpenSearch/pull/10273)) - [BUG] Disable sort optimization for HALF_FLOAT ([#10999](https://github.com/opensearch-project/OpenSearch/pull/10999)) - Refactor common parts from the Rounding class into a separate 'round' package ([#11023](https://github.com/opensearch-project/OpenSearch/issues/11023)) +- Add vectorized (SIMD) implementation of B-tree to round down dates ([#11194](https://github.com/opensearch-project/OpenSearch/issues/11194)) - Performance improvement for MultiTerm Queries on Keyword fields ([#7057](https://github.com/opensearch-project/OpenSearch/issues/7057)) - Disable concurrent aggs for Diversified Sampler and Sampler aggs ([#11087](https://github.com/opensearch-project/OpenSearch/issues/11087)) - Made leader/follower check timeout setting dynamic ([#10528](https://github.com/opensearch-project/OpenSearch/pull/10528)) diff --git a/libs/common/build.gradle b/libs/common/build.gradle index 973fe30d09842..0f2b24054b4a5 100644 --- a/libs/common/build.gradle +++ b/libs/common/build.gradle @@ -43,3 +43,39 @@ tasks.named('forbiddenApisMain').configure { // TODO: Need to decide how we want to handle for forbidden signatures with the changes to server replaceSignatureFiles 'jdk-signatures' } + +sourceSets { + java20 { + java { + srcDirs = ['src/main/java20'] + } + } +} + +configurations { + java20Implementation.extendsFrom(implementation) +} + +dependencies { + java20Implementation sourceSets.main.output +} + +compileJava20Java { + sourceCompatibility = JavaVersion.VERSION_20 + targetCompatibility = JavaVersion.VERSION_20 + options.compilerArgs += ['--add-modules', 'jdk.incubator.vector'] + options.compilerArgs -= '-Werror' // use of incubator modules is reported as a warning +} + +forbiddenApisJava20 { + failOnMissingClasses = false + ignoreSignaturesOfMissingClasses = true +} + +jar { + metaInf { + into 'versions/20' + from sourceSets.java20.output + } + manifest.attributes('Multi-Release': 'true') +} diff --git a/libs/common/src/main/java/org/opensearch/common/round/RoundableFactory.java b/libs/common/src/main/java/org/opensearch/common/round/RoundableFactory.java index b7422694c3013..0ca1ae33d24bd 100644 --- a/libs/common/src/main/java/org/opensearch/common/round/RoundableFactory.java +++ b/libs/common/src/main/java/org/opensearch/common/round/RoundableFactory.java @@ -29,7 +29,7 @@ private RoundableFactory() {} /** * Creates and returns the fastest implementation of {@link Roundable}. */ - public static Roundable create(long[] values, int size) { + public static Roundable create(long[] values, int size, boolean useSimdIfAvailable) { if (size <= LINEAR_SEARCH_MAX_SIZE) { return new BidirectionalLinearSearcher(values, size); } else { diff --git a/libs/common/src/main/java20/org/opensearch/common/round/BtreeSearcher.java b/libs/common/src/main/java20/org/opensearch/common/round/BtreeSearcher.java new file mode 100644 index 0000000000000..909d66d7392a0 --- /dev/null +++ b/libs/common/src/main/java20/org/opensearch/common/round/BtreeSearcher.java @@ -0,0 +1,97 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.common.round; + +import org.opensearch.common.annotation.InternalApi; + +import jdk.incubator.vector.LongVector; +import jdk.incubator.vector.Vector; +import jdk.incubator.vector.VectorOperators; +import jdk.incubator.vector.VectorSpecies; + +/** + * It uses vectorized B-tree search to find the round-down point. + * + * @opensearch.internal + */ +@InternalApi +class BtreeSearcher implements Roundable { + private final VectorSpecies species; + private final int lanes; + private final int shift; + private final long[] values; + + BtreeSearcher(long[] values, int size, VectorSpecies species) { + assert size > 0 : "at least one value must be present"; + + this.species = species; + this.lanes = species.length(); + this.shift = log2(lanes); + + int blocks = (size + lanes - 1) / lanes; // number of blocks + int length = 1 + blocks * lanes; // size of the backing array (1-indexed) + + this.values = new long[length]; + build(values, 0, this.values, 1); + } + + /** + * Builds the B-tree memory layout. + * + *

+ * Each block stores 'lanes' values at indices {@code i, i + 1, ..., i + lanes - 1} where {@code i} is the + * starting offset. The starting offset of the root block is 1. The branching factor is (1 + lanes) so each + * block can have these many children. Given the starting offset {@code i} of a block, the starting offset + * of its k-th child (ranging from {@code 0, 1, ..., k}) can be computed as {@code i + ((i + k) << shift)}. + * + * @param src is the sorted input array + * @param i is the index in the input array to read the value from + * @param dst is the output array + * @param j is the index in the output array to write the value to + * @return the next index 'i' + */ + private int build(long[] src, int i, long[] dst, int j) { + if (j < dst.length) { + for (int k = 0; k < lanes; k++) { + i = build(src, i, dst, j + ((j + k) << shift)); + dst[j + k] = (j + k < src.length + 1) ? src[i++] : Long.MAX_VALUE; + } + i = build(src, i, dst, j + ((j + lanes) << shift)); + } + return i; + } + + @Override + public long floor(long key) { + Vector keyVector = LongVector.broadcast(species, key); + int i = 1, result = 1; + + while (i < values.length) { + Vector valuesVector = LongVector.fromArray(species, values, i); + int j = i + valuesVector.compare(VectorOperators.GT, keyVector).firstTrue(); + result = (j > i) ? j : result; + i += (j << shift); + } + + assert result > 1 : "key must be greater than or equal to " + values[1]; + return values[result - 1]; + } + + private static int log2(int n) { + assert (n > 0) && ((n & (n - 1)) == 0) : n + " is not a positive power of 2"; + + int result = 0; + while (n > 1) { + n >>>= 1; + result += 1; + } + + return result; + } +} diff --git a/libs/common/src/main/java20/org/opensearch/common/round/RoundableFactory.java b/libs/common/src/main/java20/org/opensearch/common/round/RoundableFactory.java new file mode 100644 index 0000000000000..09d0299edac9f --- /dev/null +++ b/libs/common/src/main/java20/org/opensearch/common/round/RoundableFactory.java @@ -0,0 +1,55 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.common.round; + +import org.opensearch.common.annotation.InternalApi; + +import jdk.incubator.vector.LongVector; +import jdk.incubator.vector.VectorSpecies; + +/** + * Factory class to create and return the fastest implementation of {@link Roundable}. + * + * @opensearch.internal + */ +@InternalApi +public final class RoundableFactory { + /** + * The maximum limit up to which linear search is used, otherwise binary search is used. + * This is because linear search is much faster on small arrays. + * Benchmark results: PR #9727 + */ + private static final int LINEAR_SEARCH_MAX_SIZE = 64; + + /** + * The preferred LongVector species with the maximal bit-size supported on this platform. + */ + private static final VectorSpecies LONG_VECTOR_SPECIES = LongVector.SPECIES_PREFERRED; + + /** + * Indicates whether the vectorized (SIMD) B-tree search implementation is supported. + * This is true when the platform has a minimum of 4 long vector lanes. + */ + private static final boolean IS_BTREE_SEARCH_SUPPORTED = LONG_VECTOR_SPECIES.length() >= 4; + + private RoundableFactory() {} + + /** + * Creates and returns the fastest implementation of {@link Roundable}. + */ + public static Roundable create(long[] values, int size, boolean useSimdIfAvailable) { + if (size <= LINEAR_SEARCH_MAX_SIZE) { + return new BidirectionalLinearSearcher(values, size); + } else if (IS_BTREE_SEARCH_SUPPORTED && useSimdIfAvailable) { + return new BtreeSearcher(values, size, LONG_VECTOR_SPECIES); + } else { + return new BinarySearcher(values, size); + } + } +} diff --git a/server/src/main/java/org/opensearch/common/Rounding.java b/server/src/main/java/org/opensearch/common/Rounding.java index 061934f9722f5..d9f6189d2ee09 100644 --- a/server/src/main/java/org/opensearch/common/Rounding.java +++ b/server/src/main/java/org/opensearch/common/Rounding.java @@ -41,6 +41,7 @@ import org.opensearch.common.round.RoundableFactory; import org.opensearch.common.time.DateUtils; import org.opensearch.common.unit.TimeValue; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.io.stream.StreamOutput; import org.opensearch.core.common.io.stream.Writeable; @@ -81,6 +82,8 @@ public abstract class Rounding implements Writeable { private static final Logger logger = LogManager.getLogger(Rounding.class); + private static final boolean IS_SIMD_ROUNDING_ENABLED = FeatureFlags.isEnabled(FeatureFlags.SIMD_ROUNDING); + /** * A Date Time Unit * @@ -444,7 +447,7 @@ protected Prepared maybeUseArray(long minUtcMillis, long maxUtcMillis, int max) values = ArrayUtil.grow(values, i + 1); values[i++] = rounded; } - return new ArrayRounding(RoundableFactory.create(values, i), this); + return new ArrayRounding(RoundableFactory.create(values, i, IS_SIMD_ROUNDING_ENABLED), this); } } diff --git a/server/src/main/java/org/opensearch/common/util/FeatureFlags.java b/server/src/main/java/org/opensearch/common/util/FeatureFlags.java index 4e9b417e3433b..bae8c93bd6185 100644 --- a/server/src/main/java/org/opensearch/common/util/FeatureFlags.java +++ b/server/src/main/java/org/opensearch/common/util/FeatureFlags.java @@ -60,6 +60,12 @@ public class FeatureFlags { */ public static final String DATETIME_FORMATTER_CACHING = "opensearch.experimental.optimization.datetime_formatter_caching.enabled"; + /** + * Gates the usage of SIMD for rounding down numbers. + * This is used extensively to round down timestamps in the date_histogram aggregation. + */ + public static final String SIMD_ROUNDING = "opensearch.experimental.simd.rounding.enabled"; + /** * Should store the settings from opensearch.yml. */ @@ -122,4 +128,6 @@ public static boolean isEnabled(Setting featureFlag) { true, Property.NodeScope ); + + public static final Setting SIMD_ROUNDING_SETTING = Setting.boolSetting(SIMD_ROUNDING, false, Property.NodeScope); }