Skip to content

Commit

Permalink
Add vectorized (SIMD) implementation of B-tree to round down dates
Browse files Browse the repository at this point in the history
Signed-off-by: Ketan Verma <[email protected]>
  • Loading branch information
ketanv3 committed Nov 14, 2023
1 parent 4deac42 commit 923022b
Show file tree
Hide file tree
Showing 7 changed files with 202 additions and 2 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Add instrumentation for indexing in transport bulk action and transport shard bulk action. ([#10273](https://github.com/opensearch-project/OpenSearch/pull/10273))
- [BUG] Disable sort optimization for HALF_FLOAT ([#10999](https://github.com/opensearch-project/OpenSearch/pull/10999))
- Refactor common parts from the Rounding class into a separate 'round' package ([#11023](https://github.com/opensearch-project/OpenSearch/issues/11023))
- Add vectorized (SIMD) implementation of B-tree to round down dates ([#11194](https://github.com/opensearch-project/OpenSearch/issues/11194))
- Performance improvement for MultiTerm Queries on Keyword fields ([#7057](https://github.com/opensearch-project/OpenSearch/issues/7057))
- Disable concurrent aggs for Diversified Sampler and Sampler aggs ([#11087](https://github.com/opensearch-project/OpenSearch/issues/11087))
- Made leader/follower check timeout setting dynamic ([#10528](https://github.com/opensearch-project/OpenSearch/pull/10528))
Expand Down
36 changes: 36 additions & 0 deletions libs/common/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,39 @@ tasks.named('forbiddenApisMain').configure {
// TODO: Need to decide how we want to handle for forbidden signatures with the changes to server
replaceSignatureFiles 'jdk-signatures'
}

sourceSets {
java20 {
java {
srcDirs = ['src/main/java20']
}
}
}

configurations {
java20Implementation.extendsFrom(implementation)
}

dependencies {
java20Implementation sourceSets.main.output
}

compileJava20Java {
sourceCompatibility = JavaVersion.VERSION_20
targetCompatibility = JavaVersion.VERSION_20
options.compilerArgs += ['--add-modules', 'jdk.incubator.vector']
options.compilerArgs -= '-Werror' // use of incubator modules is reported as a warning
}

forbiddenApisJava20 {
failOnMissingClasses = false
ignoreSignaturesOfMissingClasses = true
}

jar {
metaInf {
into 'versions/20'
from sourceSets.java20.output
}
manifest.attributes('Multi-Release': 'true')
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ private RoundableFactory() {}
/**
* Creates and returns the fastest implementation of {@link Roundable}.
*/
public static Roundable create(long[] values, int size) {
public static Roundable create(long[] values, int size, boolean useSimdIfAvailable) {
if (size <= LINEAR_SEARCH_MAX_SIZE) {
return new BidirectionalLinearSearcher(values, size);
} else {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.common.round;

import org.opensearch.common.annotation.InternalApi;

import jdk.incubator.vector.LongVector;
import jdk.incubator.vector.Vector;
import jdk.incubator.vector.VectorOperators;
import jdk.incubator.vector.VectorSpecies;

/**
* It uses vectorized B-tree search to find the round-down point.
*
* @opensearch.internal
*/
@InternalApi
class BtreeSearcher implements Roundable {
private final VectorSpecies<Long> species;
private final int lanes;
private final int shift;
private final long[] values;

BtreeSearcher(long[] values, int size, VectorSpecies<Long> species) {
assert size > 0 : "at least one value must be present";

this.species = species;
this.lanes = species.length();
this.shift = log2(lanes);

int blocks = (size + lanes - 1) / lanes; // number of blocks
int length = 1 + blocks * lanes; // size of the backing array (1-indexed)

this.values = new long[length];
build(values, 0, this.values, 1);
}

/**
* Builds the B-tree memory layout.
*
* <p>
* Each block stores 'lanes' values at indices {@code i, i + 1, ..., i + lanes - 1} where {@code i} is the
* starting offset. The starting offset of the root block is 1. The branching factor is (1 + lanes) so each
* block can have these many children. Given the starting offset {@code i} of a block, the starting offset
* of its k-th child (ranging from {@code 0, 1, ..., k}) can be computed as {@code i + ((i + k) << shift)}.
*
* @param src is the sorted input array
* @param i is the index in the input array to read the value from
* @param dst is the output array
* @param j is the index in the output array to write the value to
* @return the next index 'i'
*/
private int build(long[] src, int i, long[] dst, int j) {
if (j < dst.length) {
for (int k = 0; k < lanes; k++) {
i = build(src, i, dst, j + ((j + k) << shift));
dst[j + k] = (j + k < src.length + 1) ? src[i++] : Long.MAX_VALUE;
}
i = build(src, i, dst, j + ((j + lanes) << shift));
}
return i;
}

@Override
public long floor(long key) {
Vector<Long> keyVector = LongVector.broadcast(species, key);
int i = 1, result = 1;

while (i < values.length) {
Vector<Long> valuesVector = LongVector.fromArray(species, values, i);
int j = i + valuesVector.compare(VectorOperators.GT, keyVector).firstTrue();
result = (j > i) ? j : result;
i += (j << shift);
}

assert result > 1 : "key must be greater than or equal to " + values[1];
return values[result - 1];
}

private static int log2(int n) {
assert (n > 0) && ((n & (n - 1)) == 0) : n + " is not a positive power of 2";

int result = 0;
while (n > 1) {
n >>>= 1;
result += 1;
}

return result;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.common.round;

import org.opensearch.common.annotation.InternalApi;

import jdk.incubator.vector.LongVector;
import jdk.incubator.vector.VectorSpecies;

/**
* Factory class to create and return the fastest implementation of {@link Roundable}.
*
* @opensearch.internal
*/
@InternalApi
public final class RoundableFactory {
/**
* The maximum limit up to which linear search is used, otherwise binary search is used.
* This is because linear search is much faster on small arrays.
* Benchmark results: <a href="https://github.com/opensearch-project/OpenSearch/pull/9727">PR #9727</a>
*/
private static final int LINEAR_SEARCH_MAX_SIZE = 64;

/**
* The preferred LongVector species with the maximal bit-size supported on this platform.
*/
private static final VectorSpecies<Long> LONG_VECTOR_SPECIES = LongVector.SPECIES_PREFERRED;

/**
* Indicates whether the vectorized (SIMD) B-tree search implementation is supported.
* This is true when the platform has a minimum of 4 long vector lanes.
*/
private static final boolean IS_BTREE_SEARCH_SUPPORTED = LONG_VECTOR_SPECIES.length() >= 4;

private RoundableFactory() {}

/**
* Creates and returns the fastest implementation of {@link Roundable}.
*/
public static Roundable create(long[] values, int size, boolean useSimdIfAvailable) {
if (size <= LINEAR_SEARCH_MAX_SIZE) {
return new BidirectionalLinearSearcher(values, size);
} else if (IS_BTREE_SEARCH_SUPPORTED && useSimdIfAvailable) {
return new BtreeSearcher(values, size, LONG_VECTOR_SPECIES);
} else {
return new BinarySearcher(values, size);
}
}
}
5 changes: 4 additions & 1 deletion server/src/main/java/org/opensearch/common/Rounding.java
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
import org.opensearch.common.round.RoundableFactory;
import org.opensearch.common.time.DateUtils;
import org.opensearch.common.unit.TimeValue;
import org.opensearch.common.util.FeatureFlags;
import org.opensearch.core.common.io.stream.StreamInput;
import org.opensearch.core.common.io.stream.StreamOutput;
import org.opensearch.core.common.io.stream.Writeable;
Expand Down Expand Up @@ -81,6 +82,8 @@
public abstract class Rounding implements Writeable {
private static final Logger logger = LogManager.getLogger(Rounding.class);

private static final boolean IS_SIMD_ROUNDING_ENABLED = FeatureFlags.isEnabled(FeatureFlags.SIMD_ROUNDING);

/**
* A Date Time Unit
*
Expand Down Expand Up @@ -444,7 +447,7 @@ protected Prepared maybeUseArray(long minUtcMillis, long maxUtcMillis, int max)
values = ArrayUtil.grow(values, i + 1);
values[i++] = rounded;
}
return new ArrayRounding(RoundableFactory.create(values, i), this);
return new ArrayRounding(RoundableFactory.create(values, i, IS_SIMD_ROUNDING_ENABLED), this);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,12 @@ public class FeatureFlags {
*/
public static final String DATETIME_FORMATTER_CACHING = "opensearch.experimental.optimization.datetime_formatter_caching.enabled";

/**
* Gates the usage of SIMD for rounding down numbers.
* This is used extensively to round down timestamps in the date_histogram aggregation.
*/
public static final String SIMD_ROUNDING = "opensearch.experimental.simd.rounding.enabled";

/**
* Should store the settings from opensearch.yml.
*/
Expand Down Expand Up @@ -122,4 +128,6 @@ public static boolean isEnabled(Setting<Boolean> featureFlag) {
true,
Property.NodeScope
);

public static final Setting<Boolean> SIMD_ROUNDING_SETTING = Setting.boolSetting(SIMD_ROUNDING, false, Property.NodeScope);
}

0 comments on commit 923022b

Please sign in to comment.