Skip to content

Commit

Permalink
update ARRAY_OVERLAP to plan with ArrayContainsElement for ARRAY colu…
Browse files Browse the repository at this point in the history
…mns (apache#15451)

Updates ARRAY_OVERLAP to use the same ArrayContainsElement filter added in apache#15366 when filtering ARRAY typed columns so that it can also use indexes like ARRAY_CONTAINS.
  • Loading branch information
clintropolis authored and ythorat2 committed Dec 1, 2023
1 parent 965aa8c commit 06717cd
Show file tree
Hide file tree
Showing 6 changed files with 96 additions and 28 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -200,8 +200,9 @@ public String getFormatString()
"SELECT TIME_SHIFT(MILLIS_TO_TIMESTAMP(long4), 'PT1H', 1), string2, SUM(long1 * double4) FROM foo GROUP BY 1,2 ORDER BY 3",
// 37: time shift + expr agg (group by), uniform distribution high cardinality
"SELECT TIME_SHIFT(MILLIS_TO_TIMESTAMP(long5), 'PT1H', 1), string2, SUM(long1 * double4) FROM foo GROUP BY 1,2 ORDER BY 3",
// 38: array filtering
"SELECT string1, long1 FROM foo WHERE ARRAY_CONTAINS(\"multi-string3\", 100) GROUP BY 1,2"
// 38,39: array element filtering
"SELECT string1, long1 FROM foo WHERE ARRAY_CONTAINS(\"multi-string3\", 100) GROUP BY 1,2",
"SELECT string1, long1 FROM foo WHERE ARRAY_OVERLAP(\"multi-string3\", ARRAY[100, 200]) GROUP BY 1,2"
);

@Param({"5000000"})
Expand Down Expand Up @@ -260,7 +261,8 @@ public String getFormatString()
"35",
"36",
"37",
"38"
"38",
"39"
})
private String query;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -366,6 +366,11 @@ public SimpleExtraction getSimpleExtraction()
return Preconditions.checkNotNull(simpleExtraction);
}

public boolean isArray()
{
return druidType != null && druidType.isArray();
}

/**
* Get sub {@link DruidExpression} arguments of this expression
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ public DimFilter toDruidFilter(
// if the input column is not actually an ARRAY type, but rather an MVD, we can optimize this into
// selector/equality filters on the individual array elements
if (leftExpr.isSimpleExtraction()
&& !isArray(leftExpr)
&& !leftExpr.isArray()
&& (plannerContext.isUseBoundsAndSelectors() || leftExpr.isDirectColumnAccess())) {
Expr expr = plannerContext.parseExpression(rightExpr.getExpression());
// To convert this expression filter into an And of Selector filters, we need to extract all array elements.
Expand Down Expand Up @@ -142,9 +142,9 @@ public DimFilter toDruidFilter(
}
}
// if the input is a direct array column, we can use sweet array filter
if (leftExpr.isDirectColumnAccess() && isArray(leftExpr)) {
if (leftExpr.isDirectColumnAccess() && leftExpr.isArray()) {
Expr expr = plannerContext.parseExpression(rightExpr.getExpression());
// To convert this expression filter into an And of ArrayContainsElement filters, we need to extract all array
// To convert this expression filter into an AND of ArrayContainsElement filters, we need to extract all array
// elements. For now, we can optimize only when rightExpr is a literal because there is no way to extract the
// array elements by traversing the Expr. Note that all implementations of Expr are defined as package-private
// classes in a different package.
Expand All @@ -154,6 +154,11 @@ public DimFilter toDruidFilter(
ExprEval<?> exprEval = expr.eval(InputBindings.nilBindings());
if (exprEval.isArray()) {
final Object[] arrayElements = exprEval.asArray();
if (arrayElements.length == 0) {
// this isn't likely possible today because array constructor function does not accept empty argument list
// but just in case, return null
return null;
}
final List<DimFilter> filters = new ArrayList<>(arrayElements.length);
final ColumnType elementType = ExpressionType.toColumnType(ExpressionType.elementType(exprEval.type()));
for (final Object val : arrayElements) {
Expand All @@ -180,9 +185,4 @@ public DimFilter toDruidFilter(
}
return toExpressionFilter(plannerContext, getDruidFunctionName(), druidExpressions);
}

private static boolean isArray(final DruidExpression expr)
{
return expr.getDruidType() != null && expr.getDruidType().isArray();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,12 @@
import org.apache.druid.math.expr.ExprEval;
import org.apache.druid.math.expr.ExpressionType;
import org.apache.druid.math.expr.InputBindings;
import org.apache.druid.query.filter.ArrayContainsElementFilter;
import org.apache.druid.query.filter.DimFilter;
import org.apache.druid.query.filter.EqualityFilter;
import org.apache.druid.query.filter.InDimFilter;
import org.apache.druid.query.filter.OrDimFilter;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.column.RowSignature;
import org.apache.druid.sql.calcite.expression.DruidExpression;
import org.apache.druid.sql.calcite.expression.Expressions;
Expand All @@ -41,6 +44,7 @@
import org.apache.druid.sql.calcite.rel.VirtualColumnRegistry;

import javax.annotation.Nullable;
import java.util.ArrayList;
import java.util.List;

public class ArrayOverlapOperatorConversion extends BaseExpressionDimFilterOperatorConversion
Expand Down Expand Up @@ -90,29 +94,29 @@ public DimFilter toDruidFilter(
}

// Converts array_overlaps() function into an OR of Selector filters if possible.
final DruidExpression leftExpression = druidExpressions.get(0);
final DruidExpression rightExpression = druidExpressions.get(1);
final boolean leftSimpleExtractionExpr = leftExpression.isSimpleExtraction();
final boolean leftArrayColumn = leftExpression.isDirectColumnAccess() && leftExpression.getDruidType() != null && leftExpression.getDruidType().isArray();
final boolean rightSimpleExtractionExpr = rightExpression.isSimpleExtraction();
final boolean rightArrayColumn = rightExpression.isDirectColumnAccess() && rightExpression.getDruidType() != null && rightExpression.getDruidType().isArray();
final DruidExpression leftExpr = druidExpressions.get(0);
final DruidExpression rightExpr = druidExpressions.get(1);
final boolean leftSimpleExtractionExpr = leftExpr.isSimpleExtraction();
final boolean rightSimpleExtractionExpr = rightExpr.isSimpleExtraction();
final DruidExpression simpleExtractionExpr;
final DruidExpression complexExpr;

if (leftSimpleExtractionExpr ^ rightSimpleExtractionExpr && !(leftArrayColumn || rightArrayColumn)) {
if (leftSimpleExtractionExpr ^ rightSimpleExtractionExpr) {
if (leftSimpleExtractionExpr) {
simpleExtractionExpr = leftExpression;
complexExpr = rightExpression;
simpleExtractionExpr = leftExpr;
complexExpr = rightExpr;
} else {
simpleExtractionExpr = rightExpression;
complexExpr = leftExpression;
simpleExtractionExpr = rightExpr;
complexExpr = leftExpr;
}
} else {
return toExpressionFilter(plannerContext, getDruidFunctionName(), druidExpressions);
}

Expr expr = plannerContext.parseExpression(complexExpr.getExpression());
if (expr.isLiteral() && (plannerContext.isUseBoundsAndSelectors() || simpleExtractionExpr.isDirectColumnAccess())) {
final Expr expr = plannerContext.parseExpression(complexExpr.getExpression());
if (expr.isLiteral()
&& !simpleExtractionExpr.isArray()
&& (plannerContext.isUseBoundsAndSelectors() || simpleExtractionExpr.isDirectColumnAccess())) {
// Evaluate the expression to take out the array elements.
// We can safely pass null if the expression is literal.
ExprEval<?> exprEval = expr.eval(InputBindings.nilBindings());
Expand Down Expand Up @@ -149,6 +153,47 @@ public DimFilter toDruidFilter(
);
}
}

// if the input is a direct array column, we can use sweet array filter
if (simpleExtractionExpr.isDirectColumnAccess() && simpleExtractionExpr.isArray()) {
// To convert this expression filter into an OR of ArrayContainsElement filters, we need to extract all array
// elements.
if (expr.isLiteral()) {
// Evaluate the expression to get out the array elements.
// We can safely pass a nil ObjectBinding if the expression is literal.
ExprEval<?> exprEval = expr.eval(InputBindings.nilBindings());
if (exprEval.isArray()) {
final Object[] arrayElements = exprEval.asArray();
if (arrayElements.length == 0) {
// this isn't likely possible today because array constructor function does not accept empty argument list
// but just in case, return null
return null;
}
final List<DimFilter> filters = new ArrayList<>(arrayElements.length);
final ColumnType elementType = ExpressionType.toColumnType(ExpressionType.elementType(exprEval.type()));
for (final Object val : arrayElements) {
filters.add(
new ArrayContainsElementFilter(
leftExpr.getSimpleExtraction().getColumn(),
elementType,
val,
null
)
);
}

return filters.size() == 1 ? filters.get(0) : new OrDimFilter(filters);
} else {
return new ArrayContainsElementFilter(
leftExpr.getSimpleExtraction().getColumn(),
ExpressionType.toColumnType(exprEval.type()),
exprEval.valueOrDefault(),
null
);
}
}
}

return toExpressionFilter(plannerContext, getDruidFunctionName(), druidExpressions);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -860,7 +860,12 @@ public void testArrayOverlapFilterStringArrayColumn()
newScanQueryBuilder()
.dataSource(DATA_SOURCE_ARRAYS)
.intervals(querySegmentSpec(Filtration.eternity()))
.filters(expressionFilter("array_overlap(\"arrayStringNulls\",array('a','b'))"))
.filters(
or(
new ArrayContainsElementFilter("arrayStringNulls", ColumnType.STRING, "a", null),
new ArrayContainsElementFilter("arrayStringNulls", ColumnType.STRING, "b", null)
)
)
.columns("arrayStringNulls")
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
.limit(5)
Expand All @@ -886,7 +891,12 @@ public void testArrayOverlapFilterLongArrayColumn()
newScanQueryBuilder()
.dataSource(DATA_SOURCE_ARRAYS)
.intervals(querySegmentSpec(Filtration.eternity()))
.filters(expressionFilter("array_overlap(\"arrayLongNulls\",array(1,2))"))
.filters(
or(
new ArrayContainsElementFilter("arrayLongNulls", ColumnType.LONG, 1L, null),
new ArrayContainsElementFilter("arrayLongNulls", ColumnType.LONG, 2L, null)
)
)
.columns("arrayLongNulls")
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
.limit(5)
Expand All @@ -912,7 +922,12 @@ public void testArrayOverlapFilterDoubleArrayColumn()
newScanQueryBuilder()
.dataSource(DATA_SOURCE_ARRAYS)
.intervals(querySegmentSpec(Filtration.eternity()))
.filters(expressionFilter("array_overlap(\"arrayDoubleNulls\",array(1.1,2.2))"))
.filters(
or(
new ArrayContainsElementFilter("arrayDoubleNulls", ColumnType.DOUBLE, 1.1, null),
new ArrayContainsElementFilter("arrayDoubleNulls", ColumnType.DOUBLE, 2.2, null)
)
)
.columns("arrayDoubleNulls")
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
.limit(5)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1563,7 +1563,8 @@ public void testGroupByRootSingleTypeArrayLongNullsFilteredMore()
.setDimFilter(
or(
new ArrayContainsElementFilter("arrayLongNulls", ColumnType.LONG, 1L, null),
expressionFilter("array_overlap(\"arrayLongNulls\",array(2,3))")
new ArrayContainsElementFilter("arrayLongNulls", ColumnType.LONG, 2L, null),
new ArrayContainsElementFilter("arrayLongNulls", ColumnType.LONG, 3L, null)
)
)
.setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt")))
Expand Down

0 comments on commit 06717cd

Please sign in to comment.