From d83d1e87133d151da4fbfade35815d1098803e44 Mon Sep 17 00:00:00 2001 From: Shen Yu Date: Wed, 26 Jul 2023 16:08:36 -0700 Subject: [PATCH] Non-leaf filter operators NULL support. (#11185) --- .../core/operator/docidsets/AndDocIdSet.java | 3 +- .../operator/filter/AndFilterOperator.java | 29 +++-- .../operator/filter/BaseFilterOperator.java | 40 +++++++ .../filter/BitmapBasedFilterOperator.java | 11 +- .../filter/CombinedFilterOperator.java | 7 +- .../operator/filter/EmptyFilterOperator.java | 10 +- .../filter/ExpressionFilterOperator.java | 9 +- .../operator/filter/FilterOperatorUtils.java | 22 ++-- .../H3InclusionIndexFilterOperator.java | 18 ++-- .../filter/H3IndexFilterOperator.java | 23 ++-- .../filter/InvertedIndexFilterOperator.java | 22 ++-- .../filter/JsonMatchFilterOperator.java | 9 +- .../filter/MatchAllFilterOperator.java | 9 +- .../operator/filter/NotFilterOperator.java | 24 +++-- .../operator/filter/OrFilterOperator.java | 32 ++++-- .../filter/RangeIndexBasedFilterOperator.java | 21 ++-- .../filter/ScanBasedFilterOperator.java | 14 +-- .../SortedIndexBasedFilterOperator.java | 18 ++-- .../operator/filter/TestFilterOperator.java | 101 +++++++++++------- .../filter/TextContainsFilterOperator.java | 9 +- .../filter/TextMatchFilterOperator.java | 8 +- .../operator/StarTreeFilterOperator.java | 12 ++- .../filter/AndFilterOperatorTest.java | 74 +++++++++---- .../filter/FilterOperatorUtilsTest.java | 14 ++- .../filter/NotFilterOperatorTest.java | 27 ++++- .../operator/filter/OrFilterOperatorTest.java | 57 +++++++--- .../pinot/core/operator/filter/TestUtils.java | 42 ++++++++ .../pinot/perf/BenchmarkAndDocIdIterator.java | 7 +- .../apache/pinot/perf/RawIndexBenchmark.java | 3 +- 29 files changed, 458 insertions(+), 217 deletions(-) rename pinot-core/src/{test => main}/java/org/apache/pinot/core/operator/filter/TestFilterOperator.java (50%) create mode 100644 pinot-core/src/test/java/org/apache/pinot/core/operator/filter/TestUtils.java diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/docidsets/AndDocIdSet.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/docidsets/AndDocIdSet.java index 29f267c4a13c..f5c5ed32137c 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/docidsets/AndDocIdSet.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/docidsets/AndDocIdSet.java @@ -22,6 +22,7 @@ import java.util.Comparator; import java.util.List; import java.util.Map; +import javax.annotation.Nullable; import org.apache.commons.collections.MapUtils; import org.apache.pinot.common.utils.config.QueryOptionsUtils; import org.apache.pinot.core.common.BlockDocIdIterator; @@ -58,7 +59,7 @@ public final class AndDocIdSet implements BlockDocIdSet { private final List _docIdSets; private final boolean _cardinalityBasedRankingForScan; - public AndDocIdSet(List docIdSets, Map queryOptions) { + public AndDocIdSet(List docIdSets, @Nullable Map queryOptions) { _docIdSets = docIdSets; _cardinalityBasedRankingForScan = !MapUtils.isEmpty(queryOptions) && QueryOptionsUtils.isAndScanReorderingEnabled(queryOptions); diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/AndFilterOperator.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/AndFilterOperator.java index 38da261c52c5..93c79871fa13 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/AndFilterOperator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/AndFilterOperator.java @@ -21,10 +21,12 @@ import java.util.ArrayList; import java.util.List; import java.util.Map; +import javax.annotation.Nullable; import org.apache.pinot.core.common.BlockDocIdSet; import org.apache.pinot.core.common.Operator; -import org.apache.pinot.core.operator.blocks.FilterBlock; import org.apache.pinot.core.operator.docidsets.AndDocIdSet; +import org.apache.pinot.core.operator.docidsets.MatchAllDocIdSet; +import org.apache.pinot.core.operator.docidsets.OrDocIdSet; import org.apache.pinot.spi.trace.Tracing; import org.roaringbitmap.buffer.BufferFastAggregation; import org.roaringbitmap.buffer.ImmutableRoaringBitmap; @@ -36,23 +38,34 @@ public class AndFilterOperator extends BaseFilterOperator { private final List _filterOperators; private final Map _queryOptions; - public AndFilterOperator(List filterOperators, Map queryOptions) { + public AndFilterOperator(List filterOperators, @Nullable Map queryOptions, + int numDocs, boolean nullHandlingEnabled) { + super(numDocs, nullHandlingEnabled); _filterOperators = filterOperators; _queryOptions = queryOptions; } - public AndFilterOperator(List filterOperators) { - this(filterOperators, null); + @Override + protected BlockDocIdSet getTrues() { + Tracing.activeRecording().setNumChildren(_filterOperators.size()); + List blockDocIdSets = new ArrayList<>(_filterOperators.size()); + for (BaseFilterOperator filterOperator : _filterOperators) { + blockDocIdSets.add(filterOperator.getTrues()); + } + return new AndDocIdSet(blockDocIdSets, _queryOptions); } @Override - protected FilterBlock getNextBlock() { - Tracing.activeRecording().setNumChildren(_filterOperators.size()); + protected BlockDocIdSet getFalses() { List blockDocIdSets = new ArrayList<>(_filterOperators.size()); for (BaseFilterOperator filterOperator : _filterOperators) { - blockDocIdSets.add(filterOperator.nextBlock().getBlockDocIdSet()); + if (filterOperator.isResultEmpty()) { + blockDocIdSets.add(new MatchAllDocIdSet(_numDocs)); + } else { + blockDocIdSets.add(filterOperator.getFalses()); + } } - return new FilterBlock(new AndDocIdSet(blockDocIdSets, _queryOptions)); + return new OrDocIdSet(blockDocIdSets, _numDocs); } @Override diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/BaseFilterOperator.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/BaseFilterOperator.java index eb70c8992c2b..cbfeefce2527 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/BaseFilterOperator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/BaseFilterOperator.java @@ -18,14 +18,25 @@ */ package org.apache.pinot.core.operator.filter; +import java.util.Arrays; +import org.apache.pinot.core.common.BlockDocIdSet; import org.apache.pinot.core.operator.BaseOperator; import org.apache.pinot.core.operator.blocks.FilterBlock; +import org.apache.pinot.core.operator.docidsets.NotDocIdSet; +import org.apache.pinot.core.operator.docidsets.OrDocIdSet; /** * The {@link BaseFilterOperator} class is the base class for all filter operators. */ public abstract class BaseFilterOperator extends BaseOperator { + protected final int _numDocs; + protected final boolean _nullHandlingEnabled; + + public BaseFilterOperator(int numDocs, boolean nullHandlingEnabled) { + _numDocs = numDocs; + _nullHandlingEnabled = nullHandlingEnabled; + } /** * Returns {@code true} if the result is always empty, {@code false} otherwise. @@ -68,4 +79,33 @@ public boolean canProduceBitmaps() { public BitmapCollection getBitmaps() { throw new UnsupportedOperationException(); } + + @Override + protected FilterBlock getNextBlock() { + return new FilterBlock(getTrues()); + } + + /** + * @return document IDs in which the predicate evaluates to true. + */ + protected abstract BlockDocIdSet getTrues(); + + /** + * @return document IDs in which the predicate evaluates to NULL. + */ + protected BlockDocIdSet getNulls() { + throw new UnsupportedOperationException(); + } + + /** + * @return document IDs in which the predicate evaluates to false. + */ + protected BlockDocIdSet getFalses() { + if (_nullHandlingEnabled) { + return new NotDocIdSet(new OrDocIdSet(Arrays.asList(getTrues(), getNulls()), _numDocs), + _numDocs); + } else { + return new NotDocIdSet(getTrues(), _numDocs); + } + } } diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/BitmapBasedFilterOperator.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/BitmapBasedFilterOperator.java index 91cb33879bc7..56a08abf969d 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/BitmapBasedFilterOperator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/BitmapBasedFilterOperator.java @@ -20,8 +20,8 @@ import java.util.Collections; import java.util.List; +import org.apache.pinot.core.common.BlockDocIdSet; import org.apache.pinot.core.common.Operator; -import org.apache.pinot.core.operator.blocks.FilterBlock; import org.apache.pinot.core.operator.docidsets.BitmapDocIdSet; import org.roaringbitmap.buffer.ImmutableRoaringBitmap; @@ -31,20 +31,19 @@ public class BitmapBasedFilterOperator extends BaseFilterOperator { private final ImmutableRoaringBitmap _docIds; private final boolean _exclusive; - private final int _numDocs; public BitmapBasedFilterOperator(ImmutableRoaringBitmap docIds, boolean exclusive, int numDocs) { + super(numDocs, false); _docIds = docIds; _exclusive = exclusive; - _numDocs = numDocs; } @Override - protected FilterBlock getNextBlock() { + protected BlockDocIdSet getTrues() { if (_exclusive) { - return new FilterBlock(new BitmapDocIdSet(ImmutableRoaringBitmap.flip(_docIds, 0L, _numDocs), _numDocs)); + return new BitmapDocIdSet(ImmutableRoaringBitmap.flip(_docIds, 0L, _numDocs), _numDocs); } else { - return new FilterBlock(new BitmapDocIdSet(_docIds, _numDocs)); + return new BitmapDocIdSet(_docIds, _numDocs); } } diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/CombinedFilterOperator.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/CombinedFilterOperator.java index 4c4f2a689dee..b9aeb1c99d14 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/CombinedFilterOperator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/CombinedFilterOperator.java @@ -22,7 +22,6 @@ import java.util.List; import java.util.Map; import org.apache.pinot.core.common.BlockDocIdSet; -import org.apache.pinot.core.operator.blocks.FilterBlock; import org.apache.pinot.core.operator.docidsets.AndDocIdSet; import org.apache.pinot.spi.trace.Tracing; @@ -40,6 +39,8 @@ public class CombinedFilterOperator extends BaseFilterOperator { public CombinedFilterOperator(BaseFilterOperator mainFilterOperator, BaseFilterOperator subFilterOperator, Map queryOptions) { + // This filter operator does not support AND/OR/NOT operations. + super(0, false); assert !mainFilterOperator.isResultEmpty() && !mainFilterOperator.isResultMatchingAll() && !subFilterOperator.isResultEmpty() && !subFilterOperator.isResultMatchingAll(); _mainFilterOperator = mainFilterOperator; @@ -58,10 +59,10 @@ public String toExplainString() { } @Override - protected FilterBlock getNextBlock() { + protected BlockDocIdSet getTrues() { Tracing.activeRecording().setNumChildren(2); BlockDocIdSet mainFilterDocIdSet = _mainFilterOperator.nextBlock().getNonScanFilterBLockDocIdSet(); BlockDocIdSet subFilterDocIdSet = _subFilterOperator.nextBlock().getBlockDocIdSet(); - return new FilterBlock(new AndDocIdSet(Arrays.asList(mainFilterDocIdSet, subFilterDocIdSet), _queryOptions)); + return new AndDocIdSet(Arrays.asList(mainFilterDocIdSet, subFilterDocIdSet), _queryOptions); } } diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/EmptyFilterOperator.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/EmptyFilterOperator.java index 66fe22b110c1..55144918ca59 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/EmptyFilterOperator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/EmptyFilterOperator.java @@ -20,10 +20,10 @@ import java.util.Collections; import java.util.List; +import org.apache.pinot.core.common.BlockDocIdSet; import org.apache.pinot.core.common.ExplainPlanRows; import org.apache.pinot.core.common.Operator; -import org.apache.pinot.core.operator.blocks.EmptyFilterBlock; -import org.apache.pinot.core.operator.blocks.FilterBlock; +import org.apache.pinot.core.operator.docidsets.EmptyDocIdSet; /** @@ -31,6 +31,8 @@ */ public final class EmptyFilterOperator extends BaseFilterOperator { private EmptyFilterOperator() { + // We will never call its getFalses() method. + super(0, false); } @@ -58,8 +60,8 @@ public int getNumMatchingDocs() { } @Override - protected FilterBlock getNextBlock() { - return EmptyFilterBlock.getInstance(); + protected BlockDocIdSet getTrues() { + return EmptyDocIdSet.getInstance(); } diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/ExpressionFilterOperator.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/ExpressionFilterOperator.java index 9001e48fe24a..6aeee82db601 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/ExpressionFilterOperator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/ExpressionFilterOperator.java @@ -27,9 +27,9 @@ import org.apache.pinot.common.request.context.ExpressionContext; import org.apache.pinot.common.request.context.predicate.Predicate; import org.apache.pinot.common.utils.HashUtil; +import org.apache.pinot.core.common.BlockDocIdSet; import org.apache.pinot.core.common.Operator; import org.apache.pinot.core.operator.ColumnContext; -import org.apache.pinot.core.operator.blocks.FilterBlock; import org.apache.pinot.core.operator.docidsets.ExpressionDocIdSet; import org.apache.pinot.core.operator.filter.predicate.PredicateEvaluator; import org.apache.pinot.core.operator.filter.predicate.PredicateEvaluatorProvider; @@ -43,13 +43,12 @@ public class ExpressionFilterOperator extends BaseFilterOperator { private static final String EXPLAIN_NAME = "FILTER_EXPRESSION"; - private final int _numDocs; private final Map _dataSourceMap; private final TransformFunction _transformFunction; private final PredicateEvaluator _predicateEvaluator; public ExpressionFilterOperator(IndexSegment segment, QueryContext queryContext, Predicate predicate, int numDocs) { - _numDocs = numDocs; + super(numDocs, queryContext.isNullHandlingEnabled()); Set columns = new HashSet<>(); ExpressionContext lhs = predicate.getLhs(); @@ -69,8 +68,8 @@ public ExpressionFilterOperator(IndexSegment segment, QueryContext queryContext, } @Override - protected FilterBlock getNextBlock() { - return new FilterBlock(new ExpressionDocIdSet(_transformFunction, _predicateEvaluator, _dataSourceMap, _numDocs)); + protected BlockDocIdSet getTrues() { + return new ExpressionDocIdSet(_transformFunction, _predicateEvaluator, _dataSourceMap, _numDocs); } @Override diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/FilterOperatorUtils.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/FilterOperatorUtils.java index 7e00400eeec4..88a3c18d7961 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/FilterOperatorUtils.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/FilterOperatorUtils.java @@ -85,29 +85,29 @@ public BaseFilterOperator getLeafFilterOperator(PredicateEvaluator predicateEval Predicate.Type predicateType = predicateEvaluator.getPredicateType(); if (predicateType == Predicate.Type.RANGE) { if (dataSource.getDataSourceMetadata().isSorted() && dataSource.getDictionary() != null) { - return new SortedIndexBasedFilterOperator(predicateEvaluator, dataSource, numDocs); + return new SortedIndexBasedFilterOperator(predicateEvaluator, dataSource, numDocs, nullHandlingEnabled); } if (RangeIndexBasedFilterOperator.canEvaluate(predicateEvaluator, dataSource)) { - return new RangeIndexBasedFilterOperator(predicateEvaluator, dataSource, numDocs); + return new RangeIndexBasedFilterOperator(predicateEvaluator, dataSource, numDocs, nullHandlingEnabled); } return new ScanBasedFilterOperator(predicateEvaluator, dataSource, numDocs, nullHandlingEnabled); } else if (predicateType == Predicate.Type.REGEXP_LIKE) { if (dataSource.getFSTIndex() != null && dataSource.getDataSourceMetadata().isSorted()) { - return new SortedIndexBasedFilterOperator(predicateEvaluator, dataSource, numDocs); + return new SortedIndexBasedFilterOperator(predicateEvaluator, dataSource, numDocs, nullHandlingEnabled); } if (dataSource.getFSTIndex() != null && dataSource.getInvertedIndex() != null) { - return new InvertedIndexFilterOperator(predicateEvaluator, dataSource, numDocs); + return new InvertedIndexFilterOperator(predicateEvaluator, dataSource, numDocs, nullHandlingEnabled); } return new ScanBasedFilterOperator(predicateEvaluator, dataSource, numDocs, nullHandlingEnabled); } else { if (dataSource.getDataSourceMetadata().isSorted() && dataSource.getDictionary() != null) { - return new SortedIndexBasedFilterOperator(predicateEvaluator, dataSource, numDocs); + return new SortedIndexBasedFilterOperator(predicateEvaluator, dataSource, numDocs, nullHandlingEnabled); } if (dataSource.getInvertedIndex() != null) { - return new InvertedIndexFilterOperator(predicateEvaluator, dataSource, numDocs); + return new InvertedIndexFilterOperator(predicateEvaluator, dataSource, numDocs, nullHandlingEnabled); } if (RangeIndexBasedFilterOperator.canEvaluate(predicateEvaluator, dataSource)) { - return new RangeIndexBasedFilterOperator(predicateEvaluator, dataSource, numDocs); + return new RangeIndexBasedFilterOperator(predicateEvaluator, dataSource, numDocs, nullHandlingEnabled); } return new ScanBasedFilterOperator(predicateEvaluator, dataSource, numDocs, nullHandlingEnabled); } @@ -134,7 +134,8 @@ public BaseFilterOperator getAndFilterOperator(QueryContext queryContext, List arguments = predicate.getLhs().getFunction().getArguments(); EqPredicate eqPredicate = (EqPredicate) predicate; @@ -81,7 +80,7 @@ public H3InclusionIndexFilterOperator(IndexSegment segment, QueryContext queryCo } @Override - protected FilterBlock getNextBlock() { + protected BlockDocIdSet getTrues() { // get the set of H3 cells at the specified resolution which completely cover the input shape and potential cover. Pair fullCoverAndPotentialCoverCells = _queryContext .getOrComputeSharedValue(Pair.class, LITERAL_H3_CELLS_CACHE_NAME, @@ -121,21 +120,20 @@ protected FilterBlock getNextBlock() { } /** - * Returns the filter block based on the given the partial match doc ids. + * Returns the filter block document IDs based on the given the partial match doc ids. */ - private FilterBlock getFilterBlock(MutableRoaringBitmap fullMatchDocIds, MutableRoaringBitmap partialMatchDocIds) { + private BlockDocIdSet getFilterBlock(MutableRoaringBitmap fullMatchDocIds, MutableRoaringBitmap partialMatchDocIds) { ExpressionFilterOperator expressionFilterOperator = new ExpressionFilterOperator(_segment, _queryContext, _predicate, _numDocs); - ScanBasedDocIdIterator docIdIterator = - (ScanBasedDocIdIterator) expressionFilterOperator.getNextBlock().getBlockDocIdSet().iterator(); + ScanBasedDocIdIterator docIdIterator = (ScanBasedDocIdIterator) expressionFilterOperator.getTrues().iterator(); MutableRoaringBitmap result = docIdIterator.applyAnd(partialMatchDocIds); result.or(fullMatchDocIds); - return new FilterBlock(new BitmapDocIdSet(result, _numDocs) { + return new BitmapDocIdSet(result, _numDocs) { @Override public long getNumEntriesScannedInFilter() { return docIdIterator.getNumEntriesScanned(); } - }); + }; } @Override diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/H3IndexFilterOperator.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/H3IndexFilterOperator.java index 96109ba5499d..10518a911e13 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/H3IndexFilterOperator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/H3IndexFilterOperator.java @@ -27,8 +27,8 @@ import org.apache.pinot.common.request.context.ExpressionContext; import org.apache.pinot.common.request.context.predicate.Predicate; import org.apache.pinot.common.request.context.predicate.RangePredicate; +import org.apache.pinot.core.common.BlockDocIdSet; import org.apache.pinot.core.common.Operator; -import org.apache.pinot.core.operator.blocks.FilterBlock; import org.apache.pinot.core.operator.dociditerators.ScanBasedDocIdIterator; import org.apache.pinot.core.operator.docidsets.BitmapDocIdSet; import org.apache.pinot.core.operator.docidsets.EmptyDocIdSet; @@ -52,7 +52,6 @@ public class H3IndexFilterOperator extends BaseFilterOperator { private final IndexSegment _segment; private final QueryContext _queryContext; private final Predicate _predicate; - private final int _numDocs; private final H3IndexReader _h3IndexReader; private final long _h3Id; private final double _edgeLength; @@ -60,10 +59,10 @@ public class H3IndexFilterOperator extends BaseFilterOperator { private final double _upperBound; public H3IndexFilterOperator(IndexSegment segment, QueryContext queryContext, Predicate predicate, int numDocs) { + super(numDocs, false); _segment = segment; _queryContext = queryContext; _predicate = predicate; - _numDocs = numDocs; // TODO: handle nested geography/geometry conversion functions List arguments = predicate.getLhs().getFunction().getArguments(); @@ -96,10 +95,10 @@ public H3IndexFilterOperator(IndexSegment segment, QueryContext queryContext, Pr } @Override - protected FilterBlock getNextBlock() { + protected BlockDocIdSet getTrues() { if (_upperBound < 0 || _lowerBound > _upperBound) { // Invalid upper bound, return an empty block - return new FilterBlock(EmptyDocIdSet.getInstance()); + return EmptyDocIdSet.getInstance(); } try { @@ -108,7 +107,7 @@ protected FilterBlock getNextBlock() { if (Double.isNaN(_upperBound)) { // No bound, return a match-all block - return new FilterBlock(new MatchAllDocIdSet(_numDocs)); + return new MatchAllDocIdSet(_numDocs); } // Upper bound only @@ -185,7 +184,7 @@ protected FilterBlock getNextBlock() { return getFilterBlock(fullMatchDocIds, partialMatchDocIds); } catch (Exception e) { // Fall back to ExpressionFilterOperator when the execution encounters exception (e.g. numRings is too large) - return new ExpressionFilterOperator(_segment, _queryContext, _predicate, _numDocs).getNextBlock(); + return new ExpressionFilterOperator(_segment, _queryContext, _predicate, _numDocs).getTrues(); } } @@ -229,21 +228,21 @@ private List getH3Ids(int numRings) { } /** - * Returns the filter block based on the given full match doc ids and the partial match doc ids. + * Returns the filter block document IDs based on the given full match doc ids and the partial match doc ids. */ - private FilterBlock getFilterBlock(MutableRoaringBitmap fullMatchDocIds, MutableRoaringBitmap partialMatchDocIds) { + private BlockDocIdSet getFilterBlock(MutableRoaringBitmap fullMatchDocIds, MutableRoaringBitmap partialMatchDocIds) { ExpressionFilterOperator expressionFilterOperator = new ExpressionFilterOperator(_segment, _queryContext, _predicate, _numDocs); ScanBasedDocIdIterator docIdIterator = - (ScanBasedDocIdIterator) expressionFilterOperator.getNextBlock().getBlockDocIdSet().iterator(); + (ScanBasedDocIdIterator) expressionFilterOperator.getTrues().iterator(); MutableRoaringBitmap result = docIdIterator.applyAnd(partialMatchDocIds); result.or(fullMatchDocIds); - return new FilterBlock(new BitmapDocIdSet(result, _numDocs) { + return new BitmapDocIdSet(result, _numDocs) { @Override public long getNumEntriesScannedInFilter() { return docIdIterator.getNumEntriesScanned(); } - }); + }; } diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/InvertedIndexFilterOperator.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/InvertedIndexFilterOperator.java index 8f617417a3a7..fd43711b15dd 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/InvertedIndexFilterOperator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/InvertedIndexFilterOperator.java @@ -21,10 +21,10 @@ import java.util.Collections; import java.util.List; import org.apache.pinot.common.request.context.predicate.Predicate; +import org.apache.pinot.core.common.BlockDocIdSet; import org.apache.pinot.core.common.Operator; -import org.apache.pinot.core.operator.blocks.EmptyFilterBlock; -import org.apache.pinot.core.operator.blocks.FilterBlock; import org.apache.pinot.core.operator.docidsets.BitmapDocIdSet; +import org.apache.pinot.core.operator.docidsets.EmptyDocIdSet; import org.apache.pinot.core.operator.filter.predicate.PredicateEvaluator; import org.apache.pinot.segment.spi.datasource.DataSource; import org.apache.pinot.segment.spi.index.reader.InvertedIndexReader; @@ -41,24 +41,24 @@ public class InvertedIndexFilterOperator extends BaseFilterOperator { private final PredicateEvaluator _predicateEvaluator; private final InvertedIndexReader _invertedIndexReader; private final boolean _exclusive; - private final int _numDocs; - InvertedIndexFilterOperator(PredicateEvaluator predicateEvaluator, DataSource dataSource, int numDocs) { + InvertedIndexFilterOperator(PredicateEvaluator predicateEvaluator, DataSource dataSource, int numDocs, + boolean nullHandlingEnabled) { + super(numDocs, nullHandlingEnabled); _predicateEvaluator = predicateEvaluator; @SuppressWarnings("unchecked") InvertedIndexReader invertedIndexReader = (InvertedIndexReader) dataSource.getInvertedIndex(); _invertedIndexReader = invertedIndexReader; _exclusive = predicateEvaluator.isExclusive(); - _numDocs = numDocs; } @Override - protected FilterBlock getNextBlock() { + protected BlockDocIdSet getTrues() { int[] dictIds = _exclusive ? _predicateEvaluator.getNonMatchingDictIds() : _predicateEvaluator.getMatchingDictIds(); int numDictIds = dictIds.length; if (numDictIds == 0) { - return EmptyFilterBlock.getInstance(); + return EmptyDocIdSet.getInstance(); } if (numDictIds == 1) { ImmutableRoaringBitmap docIds = _invertedIndexReader.getDocIds(dictIds[0]); @@ -66,12 +66,12 @@ protected FilterBlock getNextBlock() { if (docIds instanceof MutableRoaringBitmap) { MutableRoaringBitmap mutableRoaringBitmap = (MutableRoaringBitmap) docIds; mutableRoaringBitmap.flip(0L, _numDocs); - return new FilterBlock(new BitmapDocIdSet(mutableRoaringBitmap, _numDocs)); + return new BitmapDocIdSet(mutableRoaringBitmap, _numDocs); } else { - return new FilterBlock(new BitmapDocIdSet(ImmutableRoaringBitmap.flip(docIds, 0L, _numDocs), _numDocs)); + return new BitmapDocIdSet(ImmutableRoaringBitmap.flip(docIds, 0L, _numDocs), _numDocs); } } else { - return new FilterBlock(new BitmapDocIdSet(docIds, _numDocs)); + return new BitmapDocIdSet(docIds, _numDocs); } } else { ImmutableRoaringBitmap[] bitmaps = new ImmutableRoaringBitmap[numDictIds]; @@ -88,7 +88,7 @@ protected FilterBlock getNextBlock() { recording.setNumDocsMatchingAfterFilter(docIds.getCardinality()); recording.setFilter(FilterType.INDEX, String.valueOf(_predicateEvaluator.getPredicateType())); } - return new FilterBlock(new BitmapDocIdSet(docIds, _numDocs)); + return new BitmapDocIdSet(docIds, _numDocs); } } diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/JsonMatchFilterOperator.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/JsonMatchFilterOperator.java index 75ebaa246c66..ccf93983cfdb 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/JsonMatchFilterOperator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/JsonMatchFilterOperator.java @@ -21,8 +21,8 @@ import java.util.Collections; import java.util.List; import org.apache.pinot.common.request.context.predicate.JsonMatchPredicate; +import org.apache.pinot.core.common.BlockDocIdSet; import org.apache.pinot.core.common.Operator; -import org.apache.pinot.core.operator.blocks.FilterBlock; import org.apache.pinot.core.operator.docidsets.BitmapDocIdSet; import org.apache.pinot.segment.spi.index.reader.JsonIndexReader; import org.apache.pinot.spi.trace.FilterType; @@ -38,21 +38,20 @@ public class JsonMatchFilterOperator extends BaseFilterOperator { private static final String EXPLAIN_NAME = "FILTER_JSON_INDEX"; private final JsonIndexReader _jsonIndex; - private final int _numDocs; private final JsonMatchPredicate _predicate; public JsonMatchFilterOperator(JsonIndexReader jsonIndex, JsonMatchPredicate predicate, int numDocs) { + super(numDocs, false); _jsonIndex = jsonIndex; _predicate = predicate; - _numDocs = numDocs; } @Override - protected FilterBlock getNextBlock() { + protected BlockDocIdSet getTrues() { ImmutableRoaringBitmap bitmap = _jsonIndex.getMatchingDocIds(_predicate.getValue()); record(bitmap); - return new FilterBlock(new BitmapDocIdSet(bitmap, _numDocs)); + return new BitmapDocIdSet(bitmap, _numDocs); } @Override diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/MatchAllFilterOperator.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/MatchAllFilterOperator.java index e4f5d9f6622a..44ad60e7b4df 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/MatchAllFilterOperator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/MatchAllFilterOperator.java @@ -20,18 +20,17 @@ import java.util.Collections; import java.util.List; +import org.apache.pinot.core.common.BlockDocIdSet; import org.apache.pinot.core.common.ExplainPlanRows; import org.apache.pinot.core.common.Operator; -import org.apache.pinot.core.operator.blocks.FilterBlock; import org.apache.pinot.core.operator.docidsets.MatchAllDocIdSet; public class MatchAllFilterOperator extends BaseFilterOperator { public static final String EXPLAIN_NAME = "FILTER_MATCH_ENTIRE_SEGMENT"; - private final int _numDocs; public MatchAllFilterOperator(int numDocs) { - _numDocs = numDocs; + super(numDocs, false); } @Override @@ -40,8 +39,8 @@ public final boolean isResultMatchingAll() { } @Override - protected FilterBlock getNextBlock() { - return new FilterBlock(new MatchAllDocIdSet(_numDocs)); + protected BlockDocIdSet getTrues() { + return new MatchAllDocIdSet(_numDocs); } diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/NotFilterOperator.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/NotFilterOperator.java index ac3e2bb00b22..f473f7a0ec1e 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/NotFilterOperator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/NotFilterOperator.java @@ -18,26 +18,25 @@ */ package org.apache.pinot.core.operator.filter; + import java.util.Collections; import java.util.List; import javax.annotation.Nullable; +import org.apache.pinot.core.common.BlockDocIdSet; import org.apache.pinot.core.common.Operator; -import org.apache.pinot.core.operator.blocks.FilterBlock; -import org.apache.pinot.core.operator.docidsets.NotDocIdSet; +import org.apache.pinot.core.operator.docidsets.MatchAllDocIdSet; public class NotFilterOperator extends BaseFilterOperator { private static final String EXPLAIN_NAME = "FILTER_NOT"; private final BaseFilterOperator _filterOperator; - private final int _numDocs; - public NotFilterOperator(BaseFilterOperator filterOperator, int numDocs) { + public NotFilterOperator(BaseFilterOperator filterOperator, int numDocs, boolean nullHandlingEnabled) { + super(numDocs, nullHandlingEnabled); _filterOperator = filterOperator; - _numDocs = numDocs; } - @Override public List getChildOperators() { return Collections.singletonList(_filterOperator); @@ -50,8 +49,17 @@ public String toExplainString() { } @Override - protected FilterBlock getNextBlock() { - return new FilterBlock(new NotDocIdSet(_filterOperator.nextBlock().getBlockDocIdSet(), _numDocs)); + protected BlockDocIdSet getTrues() { + if (_filterOperator.isResultEmpty()) { + return new MatchAllDocIdSet(_numDocs); + } else { + return _filterOperator.getFalses(); + } + } + + @Override + protected BlockDocIdSet getFalses() { + return _filterOperator.getTrues(); } @Override diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/OrFilterOperator.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/OrFilterOperator.java index 114703ec062e..5b99d583531a 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/OrFilterOperator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/OrFilterOperator.java @@ -20,9 +20,12 @@ import java.util.ArrayList; import java.util.List; +import java.util.Map; +import javax.annotation.Nullable; import org.apache.pinot.core.common.BlockDocIdSet; import org.apache.pinot.core.common.Operator; -import org.apache.pinot.core.operator.blocks.FilterBlock; +import org.apache.pinot.core.operator.docidsets.AndDocIdSet; +import org.apache.pinot.core.operator.docidsets.MatchAllDocIdSet; import org.apache.pinot.core.operator.docidsets.OrDocIdSet; import org.apache.pinot.spi.trace.Tracing; import org.roaringbitmap.buffer.BufferFastAggregation; @@ -33,21 +36,36 @@ public class OrFilterOperator extends BaseFilterOperator { private static final String EXPLAIN_NAME = "FILTER_OR"; private final List _filterOperators; - private final int _numDocs; + private final Map _queryOptions; - public OrFilterOperator(List filterOperators, int numDocs) { + public OrFilterOperator(List filterOperators, @Nullable Map queryOptions, + int numDocs, boolean nullHandlingEnabled) { + super(numDocs, nullHandlingEnabled); _filterOperators = filterOperators; - _numDocs = numDocs; + _queryOptions = queryOptions; } @Override - protected FilterBlock getNextBlock() { + protected BlockDocIdSet getTrues() { Tracing.activeRecording().setNumChildren(_filterOperators.size()); List blockDocIdSets = new ArrayList<>(_filterOperators.size()); for (BaseFilterOperator filterOperator : _filterOperators) { - blockDocIdSets.add(filterOperator.nextBlock().getBlockDocIdSet()); + blockDocIdSets.add(filterOperator.getTrues()); } - return new FilterBlock(new OrDocIdSet(blockDocIdSets, _numDocs)); + return new OrDocIdSet(blockDocIdSets, _numDocs); + } + + @Override + protected BlockDocIdSet getFalses() { + List blockDocIdSets = new ArrayList<>(_filterOperators.size()); + for (BaseFilterOperator filterOperator : _filterOperators) { + if (filterOperator.isResultEmpty()) { + blockDocIdSets.add(new MatchAllDocIdSet(_numDocs)); + } else { + blockDocIdSets.add(filterOperator.getFalses()); + } + } + return new AndDocIdSet(blockDocIdSets, _queryOptions); } @Override diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/RangeIndexBasedFilterOperator.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/RangeIndexBasedFilterOperator.java index 244718998c2f..e1c3a1140730 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/RangeIndexBasedFilterOperator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/RangeIndexBasedFilterOperator.java @@ -23,7 +23,6 @@ import org.apache.pinot.common.request.context.predicate.Predicate; import org.apache.pinot.core.common.BlockDocIdSet; import org.apache.pinot.core.common.Operator; -import org.apache.pinot.core.operator.blocks.FilterBlock; import org.apache.pinot.core.operator.dociditerators.ScanBasedDocIdIterator; import org.apache.pinot.core.operator.docidsets.BitmapDocIdSet; import org.apache.pinot.core.operator.filter.predicate.PredicateEvaluator; @@ -53,7 +52,6 @@ public class RangeIndexBasedFilterOperator extends BaseFilterOperator { private final PredicateEvaluator _predicateEvaluator; private final DataSource _dataSource; private final FieldSpec.DataType _parameterType; - private final int _numDocs; static boolean canEvaluate(PredicateEvaluator predicateEvaluator, DataSource dataSource) { Predicate.Type type = predicateEvaluator.getPredicateType(); @@ -63,25 +61,26 @@ static boolean canEvaluate(PredicateEvaluator predicateEvaluator, DataSource dat } @SuppressWarnings("unchecked") - public RangeIndexBasedFilterOperator(PredicateEvaluator predicateEvaluator, DataSource dataSource, int numDocs) { + public RangeIndexBasedFilterOperator(PredicateEvaluator predicateEvaluator, DataSource dataSource, int numDocs, + boolean nullHandlingEnabled) { + super(numDocs, nullHandlingEnabled); _predicateEvaluator = predicateEvaluator; _rangeIndexReader = (RangeIndexReader) dataSource.getRangeIndex(); _dataSource = dataSource; - _numDocs = numDocs; _parameterType = predicateEvaluator.isDictionaryBased() ? FieldSpec.DataType.INT : predicateEvaluator.getDataType(); } @Override - protected FilterBlock getNextBlock() { + protected BlockDocIdSet getTrues() { if (_rangeIndexReader.isExact()) { ImmutableRoaringBitmap matches = getMatchingDocIds(); recordFilter(matches); - return new FilterBlock(new BitmapDocIdSet(matches, _numDocs)); + return new BitmapDocIdSet(matches, _numDocs); } return evaluateLegacyRangeFilter(); } - private FilterBlock evaluateLegacyRangeFilter() { + private BlockDocIdSet evaluateLegacyRangeFilter() { ImmutableRoaringBitmap matches = getMatchingDocIds(); // if the implementation cannot match the entire query exactly, it will // yield partial matches, which need to be verified by scanning. If it @@ -89,25 +88,25 @@ private FilterBlock evaluateLegacyRangeFilter() { ImmutableRoaringBitmap partialMatches = getPartiallyMatchingDocIds(); // this branch is likely until RangeIndexReader reimplemented and enabled by default if (partialMatches == null) { - return new FilterBlock(new BitmapDocIdSet(matches == null ? new MutableRoaringBitmap() : matches, _numDocs)); + return new BitmapDocIdSet(matches == null ? new MutableRoaringBitmap() : matches, _numDocs); } // TODO: support proper null handling in range index. // Need to scan the first and last range as they might be partially matched ScanBasedFilterOperator scanBasedFilterOperator = new ScanBasedFilterOperator(_predicateEvaluator, _dataSource, _numDocs, false); - BlockDocIdSet scanBasedDocIdSet = scanBasedFilterOperator.getNextBlock().getBlockDocIdSet(); + BlockDocIdSet scanBasedDocIdSet = scanBasedFilterOperator.getTrues(); MutableRoaringBitmap docIds = ((ScanBasedDocIdIterator) scanBasedDocIdSet.iterator()).applyAnd(partialMatches); if (matches != null) { docIds.or(matches); } recordFilter(matches); - return new FilterBlock(new BitmapDocIdSet(docIds, _numDocs) { + return new BitmapDocIdSet(docIds, _numDocs) { // Override this method to reflect the entries scanned @Override public long getNumEntriesScannedInFilter() { return scanBasedDocIdSet.getNumEntriesScannedInFilter(); } - }); + }; } ImmutableRoaringBitmap getMatchingDocIds() { diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/ScanBasedFilterOperator.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/ScanBasedFilterOperator.java index b2241cfc6f29..a40d9b91eb0b 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/ScanBasedFilterOperator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/ScanBasedFilterOperator.java @@ -22,8 +22,8 @@ import java.util.Collections; import java.util.List; import org.apache.pinot.core.common.BlockDocIdIterator; +import org.apache.pinot.core.common.BlockDocIdSet; import org.apache.pinot.core.common.Operator; -import org.apache.pinot.core.operator.blocks.FilterBlock; import org.apache.pinot.core.operator.docidsets.MVScanDocIdSet; import org.apache.pinot.core.operator.docidsets.SVScanDocIdSet; import org.apache.pinot.core.operator.filter.predicate.PredicateEvaluator; @@ -36,8 +36,6 @@ public class ScanBasedFilterOperator extends BaseFilterOperator { private final PredicateEvaluator _predicateEvaluator; private final DataSource _dataSource; - private final int _numDocs; - private final boolean _nullHandlingEnabled; private final int _batchSize; public ScanBasedFilterOperator(PredicateEvaluator predicateEvaluator, DataSource dataSource, int numDocs, @@ -47,10 +45,9 @@ public ScanBasedFilterOperator(PredicateEvaluator predicateEvaluator, DataSource public ScanBasedFilterOperator(PredicateEvaluator predicateEvaluator, DataSource dataSource, int numDocs, boolean nullHandlingEnabled, int batchSize) { + super(numDocs, nullHandlingEnabled); _predicateEvaluator = predicateEvaluator; _dataSource = dataSource; - _numDocs = numDocs; - _nullHandlingEnabled = nullHandlingEnabled; Preconditions.checkState(_dataSource.getForwardIndex() != null, "Forward index disabled for column: %s, scan based filtering not supported!", _dataSource.getDataSourceMetadata().getFieldSpec().getName()); @@ -58,13 +55,12 @@ public ScanBasedFilterOperator(PredicateEvaluator predicateEvaluator, DataSource } @Override - protected FilterBlock getNextBlock() { + protected BlockDocIdSet getTrues() { DataSourceMetadata dataSourceMetadata = _dataSource.getDataSourceMetadata(); if (dataSourceMetadata.isSingleValue()) { - return new FilterBlock(new SVScanDocIdSet(_predicateEvaluator, _dataSource, _numDocs, _nullHandlingEnabled, - _batchSize)); + return new SVScanDocIdSet(_predicateEvaluator, _dataSource, _numDocs, _nullHandlingEnabled, _batchSize); } else { - return new FilterBlock(new MVScanDocIdSet(_predicateEvaluator, _dataSource, _numDocs)); + return new MVScanDocIdSet(_predicateEvaluator, _dataSource, _numDocs); } } diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/SortedIndexBasedFilterOperator.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/SortedIndexBasedFilterOperator.java index cef4ef2ba011..a3e58fcf4b2f 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/SortedIndexBasedFilterOperator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/SortedIndexBasedFilterOperator.java @@ -23,8 +23,8 @@ import java.util.Arrays; import java.util.Collections; import java.util.List; +import org.apache.pinot.core.common.BlockDocIdSet; import org.apache.pinot.core.common.Operator; -import org.apache.pinot.core.operator.blocks.FilterBlock; import org.apache.pinot.core.operator.docidsets.SortedDocIdSet; import org.apache.pinot.core.operator.filter.predicate.PredicateEvaluator; import org.apache.pinot.core.operator.filter.predicate.RangePredicateEvaluatorFactory.SortedDictionaryBasedRangePredicateEvaluator; @@ -39,16 +39,16 @@ public class SortedIndexBasedFilterOperator extends BaseFilterOperator { private final PredicateEvaluator _predicateEvaluator; private final SortedIndexReader _sortedIndexReader; - private final int _numDocs; - SortedIndexBasedFilterOperator(PredicateEvaluator predicateEvaluator, DataSource dataSource, int numDocs) { + SortedIndexBasedFilterOperator(PredicateEvaluator predicateEvaluator, DataSource dataSource, int numDocs, + boolean nullHandlingEnabled) { + super(numDocs, nullHandlingEnabled); _predicateEvaluator = predicateEvaluator; _sortedIndexReader = (SortedIndexReader) dataSource.getInvertedIndex(); - _numDocs = numDocs; } @Override - protected FilterBlock getNextBlock() { + protected BlockDocIdSet getTrues() { // At this point, we need to create a list of matching docIdRanges. // // There are two kinds of operators: @@ -63,7 +63,7 @@ protected FilterBlock getNextBlock() { int startDocId = _sortedIndexReader.getDocIds(rangePredicateEvaluator.getStartDictId()).getLeft(); // NOTE: End dictionary id is exclusive in OfflineDictionaryBasedRangePredicateEvaluator. int endDocId = _sortedIndexReader.getDocIds(rangePredicateEvaluator.getEndDictId() - 1).getRight(); - return new FilterBlock(new SortedDocIdSet(Collections.singletonList(new IntPair(startDocId, endDocId)))); + return new SortedDocIdSet(Collections.singletonList(new IntPair(startDocId, endDocId))); } else { boolean exclusive = _predicateEvaluator.isExclusive(); int[] dictIds = @@ -84,9 +84,9 @@ protected FilterBlock getNextBlock() { if (lastDocId < _numDocs - 1) { docIdRanges.add(new IntPair(lastDocId + 1, _numDocs - 1)); } - return new FilterBlock(new SortedDocIdSet(docIdRanges)); + return new SortedDocIdSet(docIdRanges); } else { - return new FilterBlock(new SortedDocIdSet(Collections.singletonList(docIdRange))); + return new SortedDocIdSet(Collections.singletonList(docIdRange)); } } else { // Sort the dictIds in ascending order so that their respective docIdRanges are adjacent if they are adjacent @@ -127,7 +127,7 @@ protected FilterBlock getNextBlock() { docIdRanges = invertedDocIdRanges; } - return new FilterBlock(new SortedDocIdSet(docIdRanges)); + return new SortedDocIdSet(docIdRanges); } } } diff --git a/pinot-core/src/test/java/org/apache/pinot/core/operator/filter/TestFilterOperator.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/TestFilterOperator.java similarity index 50% rename from pinot-core/src/test/java/org/apache/pinot/core/operator/filter/TestFilterOperator.java rename to pinot-core/src/main/java/org/apache/pinot/core/operator/filter/TestFilterOperator.java index 94526dfc51e4..3aff520c87ae 100644 --- a/pinot-core/src/test/java/org/apache/pinot/core/operator/filter/TestFilterOperator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/TestFilterOperator.java @@ -23,55 +23,25 @@ import org.apache.pinot.core.common.BlockDocIdIterator; import org.apache.pinot.core.common.BlockDocIdSet; import org.apache.pinot.core.common.Operator; -import org.apache.pinot.core.operator.blocks.FilterBlock; import org.apache.pinot.segment.spi.Constants; public class TestFilterOperator extends BaseFilterOperator { private static final String EXPLAIN_NAME = "FILTER_TEST"; - private final int[] _docIds; + private final int[] _trueDocIds; + private final int[] _nullDocIds; - public TestFilterOperator(int[] docIds) { - _docIds = docIds; + public TestFilterOperator(int[] trueDocIds, int[] nullDocIds, int numDocs) { + super(numDocs, true); + _trueDocIds = trueDocIds; + _nullDocIds = nullDocIds; } - @Override - protected FilterBlock getNextBlock() { - return new FilterBlock(new BlockDocIdSet() { - @Override - public BlockDocIdIterator iterator() { - return new BlockDocIdIterator() { - private final int _numDocIds = _docIds.length; - private int _nextIndex = 0; - - @Override - public int next() { - if (_nextIndex < _numDocIds) { - return _docIds[_nextIndex++]; - } else { - return Constants.EOF; - } - } - - @Override - public int advance(int targetDocId) { - while (_nextIndex < _numDocIds) { - int docId = _docIds[_nextIndex++]; - if (docId >= targetDocId) { - return docId; - } - } - return Constants.EOF; - } - }; - } - - @Override - public long getNumEntriesScannedInFilter() { - return 0L; - } - }); + public TestFilterOperator(int[] docIds, int numDocs) { + super(numDocs, false); + _trueDocIds = docIds; + _nullDocIds = new int[0]; } @Override @@ -83,4 +53,55 @@ public String toExplainString() { public List getChildOperators() { return Collections.emptyList(); } + + @Override + protected BlockDocIdSet getTrues() { + return new TestBlockDocIdSet(_trueDocIds); + } + + @Override + protected BlockDocIdSet getNulls() { + return new TestBlockDocIdSet(_nullDocIds); + } + + private static class TestBlockDocIdSet implements BlockDocIdSet { + private final int[] _docIds; + + public TestBlockDocIdSet(int[] docIds) { + _docIds = docIds; + } + + @Override + public BlockDocIdIterator iterator() { + return new BlockDocIdIterator() { + private final int _numDocIds = _docIds.length; + private int _nextIndex = 0; + + @Override + public int next() { + if (_nextIndex < _numDocIds) { + return _docIds[_nextIndex++]; + } else { + return Constants.EOF; + } + } + + @Override + public int advance(int targetDocId) { + while (_nextIndex < _numDocIds) { + int docId = _docIds[_nextIndex++]; + if (docId >= targetDocId) { + return docId; + } + } + return Constants.EOF; + } + }; + } + + @Override + public long getNumEntriesScannedInFilter() { + return 0L; + } + } } diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/TextContainsFilterOperator.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/TextContainsFilterOperator.java index b435d2f72ac8..d76f12d5bb68 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/TextContainsFilterOperator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/TextContainsFilterOperator.java @@ -21,8 +21,8 @@ import java.util.Collections; import java.util.List; import org.apache.pinot.common.request.context.predicate.TextContainsPredicate; +import org.apache.pinot.core.common.BlockDocIdSet; import org.apache.pinot.core.common.Operator; -import org.apache.pinot.core.operator.blocks.FilterBlock; import org.apache.pinot.core.operator.docidsets.BitmapDocIdSet; import org.apache.pinot.segment.spi.index.reader.TextIndexReader; import org.apache.pinot.spi.trace.FilterType; @@ -38,18 +38,17 @@ public class TextContainsFilterOperator extends BaseFilterOperator { private static final String EXPLAIN_NAME = "FILTER_TEXT_INDEX"; private final TextIndexReader _textIndexReader; - private final int _numDocs; private final TextContainsPredicate _predicate; public TextContainsFilterOperator(TextIndexReader textIndexReader, TextContainsPredicate predicate, int numDocs) { + super(numDocs, false); _textIndexReader = textIndexReader; _predicate = predicate; - _numDocs = numDocs; } @Override - protected FilterBlock getNextBlock() { - return new FilterBlock(new BitmapDocIdSet(_textIndexReader.getDocIds(_predicate.getValue()), _numDocs)); + protected BlockDocIdSet getTrues() { + return new BitmapDocIdSet(_textIndexReader.getDocIds(_predicate.getValue()), _numDocs); } @Override diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/TextMatchFilterOperator.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/TextMatchFilterOperator.java index 6a906ba836fa..5c791eb3e62c 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/TextMatchFilterOperator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/TextMatchFilterOperator.java @@ -21,8 +21,8 @@ import java.util.Collections; import java.util.List; import org.apache.pinot.common.request.context.predicate.TextMatchPredicate; +import org.apache.pinot.core.common.BlockDocIdSet; import org.apache.pinot.core.common.Operator; -import org.apache.pinot.core.operator.blocks.FilterBlock; import org.apache.pinot.core.operator.docidsets.BitmapDocIdSet; import org.apache.pinot.segment.spi.index.reader.TextIndexReader; import org.apache.pinot.spi.trace.FilterType; @@ -43,14 +43,16 @@ public class TextMatchFilterOperator extends BaseFilterOperator { private final TextMatchPredicate _predicate; public TextMatchFilterOperator(TextIndexReader textIndexReader, TextMatchPredicate predicate, int numDocs) { + // This filter operator does not support AND/OR/NOT operations. + super(0, false); _textIndexReader = textIndexReader; _predicate = predicate; _numDocs = numDocs; } @Override - protected FilterBlock getNextBlock() { - return new FilterBlock(new BitmapDocIdSet(_textIndexReader.getDocIds(_predicate.getValue()), _numDocs)); + protected BlockDocIdSet getTrues() { + return new BitmapDocIdSet(_textIndexReader.getDocIds(_predicate.getValue()), _numDocs); } @Override diff --git a/pinot-core/src/main/java/org/apache/pinot/core/startree/operator/StarTreeFilterOperator.java b/pinot-core/src/main/java/org/apache/pinot/core/startree/operator/StarTreeFilterOperator.java index f46f7fca53ae..fabd34cfd5f4 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/startree/operator/StarTreeFilterOperator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/startree/operator/StarTreeFilterOperator.java @@ -33,9 +33,9 @@ import java.util.Set; import javax.annotation.Nullable; import org.apache.pinot.common.utils.config.QueryOptionsUtils; +import org.apache.pinot.core.common.BlockDocIdSet; import org.apache.pinot.core.common.Operator; -import org.apache.pinot.core.operator.blocks.EmptyFilterBlock; -import org.apache.pinot.core.operator.blocks.FilterBlock; +import org.apache.pinot.core.operator.docidsets.EmptyDocIdSet; import org.apache.pinot.core.operator.filter.BaseFilterOperator; import org.apache.pinot.core.operator.filter.BitmapBasedFilterOperator; import org.apache.pinot.core.operator.filter.EmptyFilterOperator; @@ -115,6 +115,8 @@ private static class StarTreeResult { public StarTreeFilterOperator(QueryContext queryContext, StarTreeV2 starTreeV2, Map> predicateEvaluatorsMap, @Nullable Set groupByColumns) { + // This filter operator does not support AND/OR/NOT operations. + super(0, false); _queryContext = queryContext; _starTreeV2 = starTreeV2; _predicateEvaluatorsMap = predicateEvaluatorsMap; @@ -123,11 +125,11 @@ public StarTreeFilterOperator(QueryContext queryContext, StarTreeV2 starTreeV2, } @Override - protected FilterBlock getNextBlock() { + protected BlockDocIdSet getTrues() { if (_resultEmpty) { - return EmptyFilterBlock.getInstance(); + return EmptyDocIdSet.getInstance(); } - return getFilterOperator().nextBlock(); + return getFilterOperator().nextBlock().getBlockDocIdSet(); } @Override diff --git a/pinot-core/src/test/java/org/apache/pinot/core/operator/filter/AndFilterOperatorTest.java b/pinot-core/src/test/java/org/apache/pinot/core/operator/filter/AndFilterOperatorTest.java index 6481ae2256c8..69a6384ae182 100644 --- a/pinot-core/src/test/java/org/apache/pinot/core/operator/filter/AndFilterOperatorTest.java +++ b/pinot-core/src/test/java/org/apache/pinot/core/operator/filter/AndFilterOperatorTest.java @@ -19,6 +19,8 @@ package org.apache.pinot.core.operator.filter; import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; import java.util.List; import org.apache.pinot.core.common.BlockDocIdIterator; import org.apache.pinot.segment.spi.Constants; @@ -31,13 +33,14 @@ public class AndFilterOperatorTest { @Test public void testIntersectionForTwoLists() { + int numDocs = 40; int[] docIds1 = new int[]{2, 3, 10, 15, 16, 28}; int[] docIds2 = new int[]{3, 6, 8, 20, 28}; List operators = new ArrayList<>(); - operators.add(new TestFilterOperator(docIds1)); - operators.add(new TestFilterOperator(docIds2)); - AndFilterOperator andOperator = new AndFilterOperator(operators); + operators.add(new TestFilterOperator(docIds1, numDocs)); + operators.add(new TestFilterOperator(docIds2, numDocs)); + AndFilterOperator andOperator = new AndFilterOperator(operators, null, numDocs, false); BlockDocIdIterator iterator = andOperator.nextBlock().getBlockDocIdSet().iterator(); Assert.assertEquals(iterator.next(), 3); @@ -47,15 +50,16 @@ public void testIntersectionForTwoLists() { @Test public void testIntersectionForThreeLists() { + int numDocs = 40; int[] docIds1 = new int[]{2, 3, 6, 10, 15, 16, 28}; int[] docIds2 = new int[]{3, 6, 8, 20, 28}; int[] docIds3 = new int[]{1, 2, 3, 6, 30}; List operators = new ArrayList<>(); - operators.add(new TestFilterOperator(docIds1)); - operators.add(new TestFilterOperator(docIds2)); - operators.add(new TestFilterOperator(docIds3)); - AndFilterOperator andOperator = new AndFilterOperator(operators); + operators.add(new TestFilterOperator(docIds1, numDocs)); + operators.add(new TestFilterOperator(docIds2, numDocs)); + operators.add(new TestFilterOperator(docIds3, numDocs)); + AndFilterOperator andOperator = new AndFilterOperator(operators, null, numDocs, false); BlockDocIdIterator iterator = andOperator.nextBlock().getBlockDocIdSet().iterator(); Assert.assertEquals(iterator.next(), 3); @@ -65,19 +69,20 @@ public void testIntersectionForThreeLists() { @Test public void testComplex() { + int numDocs = 40; int[] docIds1 = new int[]{2, 3, 6, 10, 15, 16, 28}; int[] docIds2 = new int[]{3, 6, 8, 20, 28}; int[] docIds3 = new int[]{1, 2, 3, 6, 30}; List childOperators = new ArrayList<>(); - childOperators.add(new TestFilterOperator(docIds1)); - childOperators.add(new TestFilterOperator(docIds2)); - AndFilterOperator childAndOperator = new AndFilterOperator(childOperators); + childOperators.add(new TestFilterOperator(docIds1, numDocs)); + childOperators.add(new TestFilterOperator(docIds2, numDocs)); + AndFilterOperator childAndOperator = new AndFilterOperator(childOperators, null, numDocs, false); List operators = new ArrayList<>(); operators.add(childAndOperator); - operators.add(new TestFilterOperator(docIds3)); - AndFilterOperator andOperator = new AndFilterOperator(operators); + operators.add(new TestFilterOperator(docIds3, numDocs)); + AndFilterOperator andOperator = new AndFilterOperator(operators, null, numDocs, false); BlockDocIdIterator iterator = andOperator.nextBlock().getBlockDocIdSet().iterator(); Assert.assertEquals(iterator.next(), 3); @@ -112,8 +117,8 @@ void testAndDocIdSetReordering() { numDocs)); } - AndFilterOperator andFilterOperator1 = new AndFilterOperator(childOperators1); - AndFilterOperator andFilterOperator2 = new AndFilterOperator(childOperators2); + AndFilterOperator andFilterOperator1 = new AndFilterOperator(childOperators1, null, numDocs, false); + AndFilterOperator andFilterOperator2 = new AndFilterOperator(childOperators2, null, numDocs, false); BlockDocIdIterator iterator1 = andFilterOperator1.getNextBlock().getBlockDocIdSet().iterator(); BlockDocIdIterator iterator2 = andFilterOperator2.getNextBlock().getBlockDocIdSet().iterator(); Assert.assertEquals(iterator1.next(), 0); @@ -133,19 +138,20 @@ void testAndDocIdSetReordering() { @Test public void testComplexWithOr() { + int numDocs = 40; int[] docIds1 = new int[]{2, 3, 6, 10, 15, 16, 28}; int[] docIds2 = new int[]{3, 6, 8, 20, 28}; int[] docIds3 = new int[]{1, 2, 3, 6, 30}; List childOperators = new ArrayList<>(); - childOperators.add(new TestFilterOperator(docIds3)); - childOperators.add(new TestFilterOperator(docIds2)); - OrFilterOperator childOrOperator = new OrFilterOperator(childOperators, 40); + childOperators.add(new TestFilterOperator(docIds3, numDocs)); + childOperators.add(new TestFilterOperator(docIds2, numDocs)); + OrFilterOperator childOrOperator = new OrFilterOperator(childOperators, null, numDocs, false); List operators = new ArrayList<>(); operators.add(childOrOperator); - operators.add(new TestFilterOperator(docIds1)); - AndFilterOperator andOperator = new AndFilterOperator(operators); + operators.add(new TestFilterOperator(docIds1, numDocs)); + AndFilterOperator andOperator = new AndFilterOperator(operators, null, numDocs, false); BlockDocIdIterator iterator = andOperator.nextBlock().getBlockDocIdSet().iterator(); Assert.assertEquals(iterator.next(), 2); @@ -154,4 +160,34 @@ public void testComplexWithOr() { Assert.assertEquals(iterator.next(), 28); Assert.assertEquals(iterator.next(), Constants.EOF); } + + @Test + public void testAndWithNull() { + int numDocs = 10; + int[] docIds1 = new int[]{1, 2, 3}; + int[] docIds2 = new int[]{0, 1, 2}; + int[] nullDocIds1 = new int[]{4, 5, 6}; + int[] nullDocIds2 = new int[]{3, 4, 5, 6, 7}; + + AndFilterOperator andFilterOperator = new AndFilterOperator( + Arrays.asList(new TestFilterOperator(docIds1, nullDocIds1, numDocs), + new TestFilterOperator(docIds2, nullDocIds2, numDocs)), null, numDocs, true); + + Assert.assertEquals(TestUtils.getDocIds(andFilterOperator.getTrues()), List.of(1, 2)); + Assert.assertEquals(TestUtils.getDocIds(andFilterOperator.getFalses()), List.of(0, 7, 8, 9)); + } + + @Test + public void testAndWithNullOneFilterIsEmpty() { + int numDocs = 10; + int[] docIds1 = new int[]{1, 2, 3}; + int[] nullDocIds1 = new int[]{4, 5, 6}; + + AndFilterOperator andFilterOperator = new AndFilterOperator( + Arrays.asList(new TestFilterOperator(docIds1, nullDocIds1, numDocs), EmptyFilterOperator.getInstance()), null, + numDocs, true); + + Assert.assertEquals(TestUtils.getDocIds(andFilterOperator.getTrues()), Collections.emptyList()); + Assert.assertEquals(TestUtils.getDocIds(andFilterOperator.getFalses()), List.of(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)); + } } diff --git a/pinot-core/src/test/java/org/apache/pinot/core/operator/filter/FilterOperatorUtilsTest.java b/pinot-core/src/test/java/org/apache/pinot/core/operator/filter/FilterOperatorUtilsTest.java index 8eaae2096256..7e40b24fddad 100644 --- a/pinot-core/src/test/java/org/apache/pinot/core/operator/filter/FilterOperatorUtilsTest.java +++ b/pinot-core/src/test/java/org/apache/pinot/core/operator/filter/FilterOperatorUtilsTest.java @@ -41,7 +41,8 @@ public class FilterOperatorUtilsTest { private static final int NUM_DOCS = 10; private static final BaseFilterOperator EMPTY_FILTER_OPERATOR = EmptyFilterOperator.getInstance(); private static final BaseFilterOperator MATCH_ALL_FILTER_OPERATOR = new MatchAllFilterOperator(NUM_DOCS); - private static final BaseFilterOperator REGULAR_FILTER_OPERATOR = new TestFilterOperator(new int[]{1, 4, 7}); + private static final BaseFilterOperator REGULAR_FILTER_OPERATOR = + new TestFilterOperator(new int[]{1, 4, 7}, NUM_DOCS); @Test public void testGetAndFilterOperator() { @@ -123,8 +124,8 @@ public static Object[][] priorities() { H3InclusionIndexFilterOperator h3Inclusion = mock(H3InclusionIndexFilterOperator.class); AndFilterOperator andFilterOperator = mock(AndFilterOperator.class); OrFilterOperator orFilterOperator = mock(OrFilterOperator.class); - NotFilterOperator notWithHighPriority = new NotFilterOperator(sorted, NUM_DOCS); - NotFilterOperator notWithLowPriority = new NotFilterOperator(orFilterOperator, NUM_DOCS); + NotFilterOperator notWithHighPriority = new NotFilterOperator(sorted, NUM_DOCS, false); + NotFilterOperator notWithLowPriority = new NotFilterOperator(orFilterOperator, NUM_DOCS, false); ExpressionFilterOperator expression = mock(ExpressionFilterOperator.class); BaseFilterOperator unknown = mock(BaseFilterOperator.class); @@ -132,8 +133,7 @@ public static Object[][] priorities() { MockedPrioritizedFilterOperator prioritizedBetweenSortedAndBitmap = mock(MockedPrioritizedFilterOperator.class); OptionalInt betweenSortedAndBitmapPriority = OptionalInt.of((PrioritizedFilterOperator.HIGH_PRIORITY + PrioritizedFilterOperator.MEDIUM_PRIORITY) / 2); - when(prioritizedBetweenSortedAndBitmap.getPriority()) - .thenReturn(betweenSortedAndBitmapPriority); + when(prioritizedBetweenSortedAndBitmap.getPriority()).thenReturn(betweenSortedAndBitmapPriority); MockedPrioritizedFilterOperator notPrioritized = mock(MockedPrioritizedFilterOperator.class); when(prioritizedBetweenSortedAndBitmap.getPriority()) @@ -186,5 +186,9 @@ private void assertOrder(BaseFilterOperator first, BaseFilterOperator second) { private static abstract class MockedPrioritizedFilterOperator extends BaseFilterOperator implements PrioritizedFilterOperator { + public MockedPrioritizedFilterOperator() { + // This filter operator does not support AND/OR/NOT operations. + super(0, false); + } } } diff --git a/pinot-core/src/test/java/org/apache/pinot/core/operator/filter/NotFilterOperatorTest.java b/pinot-core/src/test/java/org/apache/pinot/core/operator/filter/NotFilterOperatorTest.java index 893389bb9bbc..b0f8999066d9 100644 --- a/pinot-core/src/test/java/org/apache/pinot/core/operator/filter/NotFilterOperatorTest.java +++ b/pinot-core/src/test/java/org/apache/pinot/core/operator/filter/NotFilterOperatorTest.java @@ -19,8 +19,10 @@ package org.apache.pinot.core.operator.filter; import java.util.Arrays; +import java.util.Collections; import java.util.HashSet; import java.util.Iterator; +import java.util.List; import java.util.Set; import org.apache.pinot.core.common.BlockDocIdIterator; import org.apache.pinot.segment.spi.Constants; @@ -36,11 +38,34 @@ public void testNotOperator() { Set expectedResult = new HashSet(); expectedResult.addAll(Arrays.asList(0, 1, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 19, 20, 25, 27, 29)); Iterator expectedIterator = expectedResult.iterator(); - NotFilterOperator notFilterOperator = new NotFilterOperator(new TestFilterOperator(docIds1), 30); + NotFilterOperator notFilterOperator = new NotFilterOperator(new TestFilterOperator(docIds1, 30), 30, false); BlockDocIdIterator iterator = notFilterOperator.nextBlock().getBlockDocIdSet().iterator(); int docId; while ((docId = iterator.next()) != Constants.EOF) { Assert.assertEquals(docId, expectedIterator.next().intValue()); } } + + @Test + public void testNotWithNull() { + int numDocs = 10; + int[] docIds = new int[]{0, 1, 2, 3}; + int[] nullDocIds = new int[]{4, 5, 6}; + + NotFilterOperator notFilterOperator = + new NotFilterOperator(new TestFilterOperator(docIds, nullDocIds, numDocs), numDocs, true); + + Assert.assertEquals(TestUtils.getDocIds(notFilterOperator.getTrues()), List.of(7, 8, 9)); + Assert.assertEquals(TestUtils.getDocIds(notFilterOperator.getFalses()), List.of(0, 1, 2, 3)); + } + + @Test + public void testNotEmptyFilterOperator() { + int numDocs = 5; + + NotFilterOperator notFilterOperator = new NotFilterOperator(EmptyFilterOperator.getInstance(), numDocs, true); + + Assert.assertEquals(TestUtils.getDocIds(notFilterOperator.getTrues()), List.of(0, 1, 2, 3, 4)); + Assert.assertEquals(TestUtils.getDocIds(notFilterOperator.getFalses()), Collections.emptyList()); + } } diff --git a/pinot-core/src/test/java/org/apache/pinot/core/operator/filter/OrFilterOperatorTest.java b/pinot-core/src/test/java/org/apache/pinot/core/operator/filter/OrFilterOperatorTest.java index c16b4789cf20..400d75a47d3a 100644 --- a/pinot-core/src/test/java/org/apache/pinot/core/operator/filter/OrFilterOperatorTest.java +++ b/pinot-core/src/test/java/org/apache/pinot/core/operator/filter/OrFilterOperatorTest.java @@ -34,6 +34,7 @@ public class OrFilterOperatorTest { @Test public void testUnionForTwoLists() { + int numDocs = 40; int[] docIds1 = new int[]{2, 3, 10, 15, 16, 28}; int[] docIds2 = new int[]{3, 6, 8, 20, 28}; TreeSet treeSet = new TreeSet<>(); @@ -42,9 +43,9 @@ public void testUnionForTwoLists() { Iterator expectedIterator = treeSet.iterator(); List operators = new ArrayList<>(); - operators.add(new TestFilterOperator(docIds1)); - operators.add(new TestFilterOperator(docIds2)); - OrFilterOperator orOperator = new OrFilterOperator(operators, 40); + operators.add(new TestFilterOperator(docIds1, numDocs)); + operators.add(new TestFilterOperator(docIds2, numDocs)); + OrFilterOperator orOperator = new OrFilterOperator(operators, null, numDocs, false); BlockDocIdIterator iterator = orOperator.nextBlock().getBlockDocIdSet().iterator(); int docId; @@ -55,6 +56,7 @@ public void testUnionForTwoLists() { @Test public void testUnionForThreeLists() { + int numDocs = 40; int[] docIds1 = new int[]{2, 3, 6, 10, 15, 16, 28}; int[] docIds2 = new int[]{3, 6, 8, 20, 28}; int[] docIds3 = new int[]{1, 2, 3, 6, 30}; @@ -65,10 +67,10 @@ public void testUnionForThreeLists() { Iterator expectedIterator = treeSet.iterator(); List operators = new ArrayList<>(); - operators.add(new TestFilterOperator(docIds1)); - operators.add(new TestFilterOperator(docIds2)); - operators.add(new TestFilterOperator(docIds3)); - OrFilterOperator orOperator = new OrFilterOperator(operators, 40); + operators.add(new TestFilterOperator(docIds1, numDocs)); + operators.add(new TestFilterOperator(docIds2, numDocs)); + operators.add(new TestFilterOperator(docIds3, numDocs)); + OrFilterOperator orOperator = new OrFilterOperator(operators, null, numDocs, false); BlockDocIdIterator iterator = orOperator.nextBlock().getBlockDocIdSet().iterator(); int docId; @@ -79,6 +81,7 @@ public void testUnionForThreeLists() { @Test public void testComplex() { + int numDocs = 40; int[] docIds1 = new int[]{2, 3, 6, 10, 15, 16, 28}; int[] docIds2 = new int[]{3, 6, 8, 20, 28}; int[] docIds3 = new int[]{1, 2, 3, 6, 30}; @@ -89,14 +92,14 @@ public void testComplex() { Iterator expectedIterator = treeSet.iterator(); List childOperators = new ArrayList<>(); - childOperators.add(new TestFilterOperator(docIds1)); - childOperators.add(new TestFilterOperator(docIds2)); - OrFilterOperator childOrOperator = new OrFilterOperator(childOperators, 40); + childOperators.add(new TestFilterOperator(docIds1, numDocs)); + childOperators.add(new TestFilterOperator(docIds2, numDocs)); + OrFilterOperator childOrOperator = new OrFilterOperator(childOperators, null, numDocs, false); List operators = new ArrayList<>(); operators.add(childOrOperator); - operators.add(new TestFilterOperator(docIds3)); - OrFilterOperator orOperator = new OrFilterOperator(operators, 40); + operators.add(new TestFilterOperator(docIds3, numDocs)); + OrFilterOperator orOperator = new OrFilterOperator(operators, null, numDocs, false); BlockDocIdIterator iterator = orOperator.nextBlock().getBlockDocIdSet().iterator(); int docId; @@ -104,4 +107,34 @@ public void testComplex() { Assert.assertEquals(docId, expectedIterator.next().intValue()); } } + + @Test + public void testOrWithNull() { + int numDocs = 10; + int[] docIds1 = new int[]{1, 2, 3}; + int[] docIds2 = new int[]{0, 1, 2}; + int[] nullDocIds1 = new int[]{4, 5, 6}; + int[] nullDocIds2 = new int[]{3, 4, 5, 6, 7}; + + OrFilterOperator orFilterOperator = new OrFilterOperator( + Arrays.asList(new TestFilterOperator(docIds1, nullDocIds1, numDocs), + new TestFilterOperator(docIds2, nullDocIds2, numDocs)), null, numDocs, true); + + Assert.assertEquals(TestUtils.getDocIds(orFilterOperator.getTrues()), List.of(0, 1, 2, 3)); + Assert.assertEquals(TestUtils.getDocIds(orFilterOperator.getFalses()), List.of(8, 9)); + } + + @Test + public void testOrWithNullOneFilterIsEmpty() { + int numDocs = 10; + int[] docIds1 = new int[]{1, 2, 3}; + int[] nullDocIds1 = new int[]{4, 5, 6}; + + OrFilterOperator orFilterOperator = new OrFilterOperator( + Arrays.asList(new TestFilterOperator(docIds1, nullDocIds1, numDocs), EmptyFilterOperator.getInstance()), null, + numDocs, true); + + Assert.assertEquals(TestUtils.getDocIds(orFilterOperator.getTrues()), Arrays.asList(1, 2, 3)); + Assert.assertEquals(TestUtils.getDocIds(orFilterOperator.getFalses()), Arrays.asList(0, 7, 8, 9)); + } } diff --git a/pinot-core/src/test/java/org/apache/pinot/core/operator/filter/TestUtils.java b/pinot-core/src/test/java/org/apache/pinot/core/operator/filter/TestUtils.java new file mode 100644 index 000000000000..061d542e54ee --- /dev/null +++ b/pinot-core/src/test/java/org/apache/pinot/core/operator/filter/TestUtils.java @@ -0,0 +1,42 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.core.operator.filter; + +import java.util.ArrayList; +import java.util.List; +import org.apache.pinot.core.common.BlockDocIdIterator; +import org.apache.pinot.core.common.BlockDocIdSet; +import org.apache.pinot.segment.spi.Constants; + + +public class TestUtils { + private TestUtils() { + } + + public static List getDocIds(BlockDocIdSet blockDocIdSet) { + BlockDocIdIterator iterator = blockDocIdSet.iterator(); + List docIds = new ArrayList<>(); + int curr = iterator.next(); + while (curr != Constants.EOF) { + docIds.add(curr); + curr = iterator.next(); + } + return docIds; + } +} diff --git a/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkAndDocIdIterator.java b/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkAndDocIdIterator.java index 6982dc597b00..eba57682814f 100644 --- a/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkAndDocIdIterator.java +++ b/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkAndDocIdIterator.java @@ -61,7 +61,8 @@ public static void main(String[] args) @OutputTimeUnit(TimeUnit.MILLISECONDS) public void benchAndFilterOperator(MyState myState, Blackhole bh) { for (int i = 0; i < 100; i++) { - bh.consume(new AndFilterOperator(myState._childOperators).nextBlock().getBlockDocIdSet().iterator()); + bh.consume(new AndFilterOperator(myState._childOperators, null, NUM_DOCS, false).nextBlock().getBlockDocIdSet() + .iterator()); } } @@ -70,7 +71,9 @@ public void benchAndFilterOperator(MyState myState, Blackhole bh) { @OutputTimeUnit(TimeUnit.MILLISECONDS) public void benchAndFilterOperatorDegenerate(MyState myState, Blackhole bh) { for (int i = 0; i < 100; i++) { - bh.consume(new AndFilterOperator(myState._childOperatorsNoOrdering).nextBlock().getBlockDocIdSet().iterator()); + bh.consume( + new AndFilterOperator(myState._childOperatorsNoOrdering, null, NUM_DOCS, false).nextBlock().getBlockDocIdSet() + .iterator()); } } diff --git a/pinot-perf/src/main/java/org/apache/pinot/perf/RawIndexBenchmark.java b/pinot-perf/src/main/java/org/apache/pinot/perf/RawIndexBenchmark.java index 13e9f42e181d..e840238b3730 100644 --- a/pinot-perf/src/main/java/org/apache/pinot/perf/RawIndexBenchmark.java +++ b/pinot-perf/src/main/java/org/apache/pinot/perf/RawIndexBenchmark.java @@ -223,7 +223,8 @@ private void compareLookups(IndexSegment segment) { * @return Time take in millis for the lookups */ private long profileLookups(IndexSegment segment, String column, int[] docIds) { - BaseFilterOperator filterOperator = new TestFilterOperator(docIds); + BaseFilterOperator filterOperator = + new TestFilterOperator(docIds, segment.getDataSource(column).getDataSourceMetadata().getNumDocs()); DocIdSetOperator docIdSetOperator = new DocIdSetOperator(filterOperator, DocIdSetPlanNode.MAX_DOC_PER_CALL); ProjectionOperator projectionOperator = new ProjectionOperator(buildDataSourceMap(segment), docIdSetOperator);