-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Refactor the filter rewrite optimization (#14464)
* Refactor Split the single Helper classes and move the classes into a new package for any optimization we introduced for search path. Rename the class name to make it more straightforward and general Signed-off-by: bowenlan-amzn <[email protected]> * Refactor refactor the canOptimize logic sort out the basic rule about how to provide data from aggregator, and where to put common logic Signed-off-by: bowenlan-amzn <[email protected]> * Refactor refactor the data provider and try optimize logic Signed-off-by: bowenlan-amzn <[email protected]> * Refactor Signed-off-by: bowenlan-amzn <[email protected]> * Refactor extract segment match all logic Signed-off-by: bowenlan-amzn <[email protected]> * Refactor Signed-off-by: bowenlan-amzn <[email protected]> * Refactor inline class Signed-off-by: bowenlan-amzn <[email protected]> * Fix a bug Signed-off-by: bowenlan-amzn <[email protected]> * address comment Signed-off-by: bowenlan-amzn <[email protected]> * prepareFromSegment now doesn't return Ranges Signed-off-by: bowenlan-amzn <[email protected]> * how it looks like when introduce interfaces Signed-off-by: bowenlan-amzn <[email protected]> * remove interface, clean up Signed-off-by: bowenlan-amzn <[email protected]> * improve doc Signed-off-by: bowenlan-amzn <[email protected]> * move multirangetraversal logic to helper Signed-off-by: bowenlan-amzn <[email protected]> * improve the refactor package name -> filterrewrite move tree traversal logic to new class add documentation for important abstract methods add sub class for composite aggregation bridge Signed-off-by: bowenlan-amzn <[email protected]> * Address Marc's comments Signed-off-by: bowenlan-amzn <[email protected]> * Address concurrent segment search concern To save the ranges per segment, now change to a map that save ranges for segments separately. The increment document function "incrementBucketDocCount" should already be thread safe, as it's the same method used by normal aggregation execution path Signed-off-by: bowenlan-amzn <[email protected]> * remove circular dependency Signed-off-by: bowenlan-amzn <[email protected]> * Address comment - remove map of segment ranges, pass in by calling getRanges when needed - use AtomicInteger for the debug info Signed-off-by: bowenlan-amzn <[email protected]> --------- Signed-off-by: bowenlan-amzn <[email protected]> (cherry picked from commit 170ea27) Signed-off-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
- Loading branch information
1 parent
6fbd079
commit 6c301f2
Showing
14 changed files
with
1,256 additions
and
1,027 deletions.
There are no files selected for viewing
849 changes: 0 additions & 849 deletions
849
server/src/main/java/org/opensearch/search/aggregations/bucket/FastFilterRewriteHelper.java
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
84 changes: 84 additions & 0 deletions
84
...c/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/AggregatorBridge.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
/* | ||
* SPDX-License-Identifier: Apache-2.0 | ||
* | ||
* The OpenSearch Contributors require contributions made to | ||
* this file be licensed under the Apache-2.0 license or a | ||
* compatible open source license. | ||
*/ | ||
|
||
package org.opensearch.search.aggregations.bucket.filterrewrite; | ||
|
||
import org.apache.lucene.index.LeafReaderContext; | ||
import org.apache.lucene.index.PointValues; | ||
import org.opensearch.index.mapper.MappedFieldType; | ||
|
||
import java.io.IOException; | ||
import java.util.function.BiConsumer; | ||
import java.util.function.Consumer; | ||
|
||
/** | ||
* This interface provides a bridge between an aggregator and the optimization context, allowing | ||
* the aggregator to provide data and optimize the aggregation process. | ||
* | ||
* <p>The main purpose of this interface is to encapsulate the aggregator-specific optimization | ||
* logic and provide access to the data in Aggregator that is required for optimization, while keeping the optimization | ||
* business logic separate from the aggregator implementation. | ||
* | ||
* <p>To use this interface to optimize an aggregator, you should subclass this interface in this package | ||
* and put any specific optimization business logic in it. Then implement this subclass in the aggregator | ||
* to provide data that is needed for doing the optimization | ||
* | ||
* @opensearch.internal | ||
*/ | ||
public abstract class AggregatorBridge { | ||
|
||
/** | ||
* The field type associated with this aggregator bridge. | ||
*/ | ||
MappedFieldType fieldType; | ||
|
||
Consumer<Ranges> setRanges; | ||
|
||
void setRangesConsumer(Consumer<Ranges> setRanges) { | ||
this.setRanges = setRanges; | ||
} | ||
|
||
/** | ||
* Checks whether the aggregator can be optimized. | ||
* <p> | ||
* This method is supposed to be implemented in a specific aggregator to take in fields from there | ||
* | ||
* @return {@code true} if the aggregator can be optimized, {@code false} otherwise. | ||
* The result will be saved in the optimization context. | ||
*/ | ||
protected abstract boolean canOptimize(); | ||
|
||
/** | ||
* Prepares the optimization at shard level after checking aggregator is optimizable. | ||
* <p> | ||
* For example, figure out what are the ranges from the aggregation to do the optimization later | ||
* <p> | ||
* This method is supposed to be implemented in a specific aggregator to take in fields from there | ||
*/ | ||
protected abstract void prepare() throws IOException; | ||
|
||
/** | ||
* Prepares the optimization for a specific segment when the segment is functionally matching all docs | ||
* | ||
* @param leaf the leaf reader context for the segment | ||
*/ | ||
abstract Ranges tryBuildRangesFromSegment(LeafReaderContext leaf) throws IOException; | ||
|
||
/** | ||
* Attempts to build aggregation results for a segment | ||
* | ||
* @param values the point values (index structure for numeric values) for a segment | ||
* @param incrementDocCount a consumer to increment the document count for a range bucket. The First parameter is document count, the second is the key of the bucket | ||
* @param ranges | ||
*/ | ||
abstract FilterRewriteOptimizationContext.DebugInfo tryOptimize( | ||
PointValues values, | ||
BiConsumer<Long, Long> incrementDocCount, | ||
Ranges ranges | ||
) throws IOException; | ||
} |
36 changes: 36 additions & 0 deletions
36
...va/org/opensearch/search/aggregations/bucket/filterrewrite/CompositeAggregatorBridge.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
/* | ||
* SPDX-License-Identifier: Apache-2.0 | ||
* | ||
* The OpenSearch Contributors require contributions made to | ||
* this file be licensed under the Apache-2.0 license or a | ||
* compatible open source license. | ||
*/ | ||
|
||
package org.opensearch.search.aggregations.bucket.filterrewrite; | ||
|
||
import org.opensearch.index.mapper.DateFieldMapper; | ||
import org.opensearch.index.mapper.MappedFieldType; | ||
import org.opensearch.search.aggregations.bucket.composite.CompositeValuesSourceConfig; | ||
import org.opensearch.search.aggregations.bucket.composite.RoundingValuesSource; | ||
|
||
/** | ||
* For composite aggregation to do optimization when it only has a single date histogram source | ||
*/ | ||
public abstract class CompositeAggregatorBridge extends DateHistogramAggregatorBridge { | ||
protected boolean canOptimize(CompositeValuesSourceConfig[] sourceConfigs) { | ||
if (sourceConfigs.length != 1 || !(sourceConfigs[0].valuesSource() instanceof RoundingValuesSource)) return false; | ||
return canOptimize(sourceConfigs[0].missingBucket(), sourceConfigs[0].hasScript(), sourceConfigs[0].fieldType()); | ||
} | ||
|
||
private boolean canOptimize(boolean missing, boolean hasScript, MappedFieldType fieldType) { | ||
if (!missing && !hasScript) { | ||
if (fieldType instanceof DateFieldMapper.DateFieldType) { | ||
if (fieldType.isSearchable()) { | ||
this.fieldType = fieldType; | ||
return true; | ||
} | ||
} | ||
} | ||
return false; | ||
} | ||
} |
Oops, something went wrong.