Skip to content

Commit

Permalink
[CALCITE-6836] Add Rule to convert INTERSECT to EXISTS
Browse files Browse the repository at this point in the history
  • Loading branch information
xiedeyantu committed Feb 27, 2025
1 parent 51a6ed9 commit c471930
Show file tree
Hide file tree
Showing 4 changed files with 335 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
*/
package org.apache.calcite.rel.rules;

import org.apache.calcite.plan.RelOptUtil.Exists;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.core.Aggregate;
import org.apache.calcite.rel.core.Calc;
Expand Down Expand Up @@ -348,6 +349,11 @@ private CoreRules() {}
public static final IntersectToDistinctRule INTERSECT_TO_DISTINCT =
IntersectToDistinctRule.Config.DEFAULT.toRule();

/** Rule that translates a {@link Intersect}
* into a {@link Exists} subquery. */
public static final IntersectToExistsRule INTERSECT_TO_EXISTS =
IntersectToExistsRule.Config.DEFAULT.toRule();

/** Rule that translates a distinct
* {@link Minus} into a group of operators
* composed of {@link Union}, {@link Aggregate}, etc. */
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to you under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.calcite.rel.rules;

import org.apache.calcite.plan.RelOptRuleCall;
import org.apache.calcite.plan.RelOptUtil.Exists;
import org.apache.calcite.plan.RelRule;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.core.CorrelationId;
import org.apache.calcite.rel.core.Intersect;
import org.apache.calcite.rel.logical.LogicalIntersect;
import org.apache.calcite.rel.type.RelDataTypeField;
import org.apache.calcite.rex.RexBuilder;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.rex.RexSubQuery;
import org.apache.calcite.rex.RexUtil;
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
import org.apache.calcite.tools.RelBuilder;
import org.apache.calcite.tools.RelBuilderFactory;
import org.apache.calcite.util.ImmutableBitSet;

import com.google.common.collect.ImmutableSet;

import org.immutables.value.Value;

import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;

/**
* Planner rule that translates a {@link Intersect}
* (<code>all</code> = <code>false</code>)
* into a {@link Exists}.
*
* @see CoreRules#INTERSECT_TO_EXISTS
*/
@Value.Enclosing
public class IntersectToExistsRule
extends RelRule<IntersectToExistsRule.Config>
implements TransformationRule {

/** Creates an IntersectToExistRule. */
protected IntersectToExistsRule(Config config) {
super(config);
}

@Deprecated // to be removed before 2.0
public IntersectToExistsRule(Class<? extends Intersect> intersectClass,
RelBuilderFactory relBuilderFactory) {
this(Config.DEFAULT.withRelBuilderFactory(relBuilderFactory)
.as(Config.class)
.withOperandFor(intersectClass));
}

//~ Methods ----------------------------------------------------------------

@Override public void onMatch(RelOptRuleCall call) {
final Intersect intersect = call.rel(0);
if (intersect.all) {
return; // nothing we can do
}

final RelBuilder builder = call.builder();
final RexBuilder rexBuilder = builder.getRexBuilder();

// get all column indices of intersect
ImmutableBitSet fieldIndices = ImmutableBitSet.of(intersect.getRowType().getFieldList()
.stream().map(RelDataTypeField::getIndex)
.collect(Collectors.toList()));

List<RelNode> inputs = intersect.getInputs();
RelNode current = inputs.get(0);

// iterate over the inputs and apply exists subquery
for (int i = 1; i < inputs.size(); i++) {
RelNode nextInput = inputs.get(i);

// create correlation
CorrelationId correlationId = intersect.getCluster().createCorrel();
RexNode correl =
rexBuilder.makeCorrel(nextInput.getRowType(), correlationId);

// create condition in exists filter, and use correlation
List<RexNode> conditions = new ArrayList<>();
for (int fieldIndex : fieldIndices) {
RexNode outerField = rexBuilder.makeInputRef(current, fieldIndex);
RexNode innerField =
rexBuilder.makeFieldAccess(correl, fieldIndex);
conditions.add(
rexBuilder.makeCall(SqlStdOperatorTable.IS_NOT_DISTINCT_FROM,
outerField, innerField));
}
RexNode condition = RexUtil.composeConjunction(rexBuilder, conditions);

// build exists subquery
RelNode existsSubQuery = builder.push(nextInput)
.filter(condition)
.project(builder.fields(fieldIndices))
.build();

// apply exists subquery to the current relation
current = builder.push(current)
.filter(ImmutableSet.of(correlationId),
RexSubQuery.exists(existsSubQuery))
.build();
}

RelNode result = builder.push(current)
.project(builder.fields(fieldIndices))
.distinct()
.build();

call.transformTo(result);
}

/** Rule configuration. */
@Value.Immutable
public interface Config extends RelRule.Config {
Config DEFAULT = ImmutableIntersectToExistsRule.Config.of()
.withOperandFor(LogicalIntersect.class);

@Override default IntersectToExistsRule toRule() {
return new IntersectToExistsRule(this);
}

/** Defines an operand tree for the given classes. */
default Config withOperandFor(Class<? extends Intersect> intersectClass) {
return withOperandSupplier(b -> b.operand(intersectClass).anyInputs())
.as(Config.class);
}
}
}
46 changes: 46 additions & 0 deletions core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -9716,4 +9716,50 @@ private void checkJoinAssociateRuleWithTopAlwaysTrueCondition(boolean allowAlway
fixture().withRelBuilderConfig(a -> a.withBloat(-1))
.relFn(relFn).withPlanner(planner).check();
}

/**
* Test case of
* <a href="https://issues.apache.org/jira/browse/CALCITE-6836">[CALCITE-6836]
* Add Rule to convert INTERSECT to EXISTS</a>. */
@Test void testIntersectToExistsRuleOneField() {
String sql = "SELECT a.ename FROM emp AS a\n"
+ "INTERSECT\n"
+ "SELECT b.name FROM dept AS b";
sql(sql).withRule(CoreRules.INTERSECT_TO_EXISTS)
.check();
}

@Test void testIntersectToExistsRulePrimaryKey() {
String sql = "SELECT a.empno FROM emp AS a\n"
+ "INTERSECT\n"
+ "SELECT b.empno FROM emp AS b";
sql(sql).withRule(CoreRules.INTERSECT_TO_EXISTS)
.check();
}

@Test void testIntersectToExistsRuleMultiFields() {
String sql = "SELECT a.ename, a.job FROM emp AS a\n"
+ "INTERSECT\n"
+ "SELECT b.ename, b.job FROM emp AS b";
sql(sql).withRule(CoreRules.INTERSECT_TO_EXISTS)
.check();
}

@Test void testIntersectToExistsRuleMultiIntersect() {
String sql = "SELECT a.ename FROM emp AS a\n"
+ "INTERSECT\n"
+ "SELECT b.name FROM dept AS b\n"
+ "INTERSECT\n"
+ "SELECT c.ename FROM emp AS c";
sql(sql).withRule(CoreRules.INTERSECT_TO_EXISTS)
.check();
}

@Test void testIntersectToExistsRuleWithAll() {
String sql = "SELECT a.ename FROM emp AS a\n"
+ "INTERSECT ALL\n"
+ "SELECT b.name FROM dept AS b";
sql(sql).withRule(CoreRules.INTERSECT_TO_EXISTS)
.checkUnchanged();
}
}
137 changes: 137 additions & 0 deletions core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5840,6 +5840,143 @@ LogicalIntersect(all=[true])
LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8])
LogicalFilter(condition=[=($7, 30)])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
]]>
</Resource>
</TestCase>
<TestCase name="testIntersectToExistsRuleMultiFields">
<Resource name="sql">
<![CDATA[SELECT a.ename, a.job FROM emp AS a
INTERSECT
SELECT b.ename, b.job FROM emp AS b]]>
</Resource>
<Resource name="planBefore">
<![CDATA[
LogicalIntersect(all=[false])
LogicalProject(ENAME=[$1], JOB=[$2])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
LogicalProject(ENAME=[$1], JOB=[$2])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
]]>
</Resource>
<Resource name="planAfter">
<![CDATA[
LogicalAggregate(group=[{0, 1}])
LogicalFilter(condition=[EXISTS({
LogicalFilter(condition=[AND(IS NOT DISTINCT FROM($0, $cor0.ENAME), IS NOT DISTINCT FROM($1, $cor0.JOB))])
LogicalProject(ENAME=[$1], JOB=[$2])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
})], variablesSet=[[$cor0]])
LogicalProject(ENAME=[$1], JOB=[$2])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
]]>
</Resource>
</TestCase>
<TestCase name="testIntersectToExistsRuleMultiIntersect">
<Resource name="sql">
<![CDATA[SELECT a.ename FROM emp AS a
INTERSECT
SELECT b.name FROM dept AS b
INTERSECT
SELECT c.ename FROM emp AS c]]>
</Resource>
<Resource name="planBefore">
<![CDATA[
LogicalIntersect(all=[false])
LogicalIntersect(all=[false])
LogicalProject(ENAME=[$1])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
LogicalProject(NAME=[CAST($1):VARCHAR(20) NOT NULL])
LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
LogicalProject(ENAME=[$1])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
]]>
</Resource>
<Resource name="planAfter">
<![CDATA[
LogicalFilter(condition=[EXISTS({
LogicalFilter(condition=[IS NOT DISTINCT FROM($0, $cor0.ENAME)])
LogicalProject(ENAME=[$1])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
})], variablesSet=[[$cor0]])
LogicalAggregate(group=[{0}])
LogicalFilter(condition=[EXISTS({
LogicalFilter(condition=[IS NOT DISTINCT FROM($0, $cor1.NAME)])
LogicalProject(NAME=[CAST($1):VARCHAR(20) NOT NULL])
LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
})], variablesSet=[[$cor1]])
LogicalProject(ENAME=[$1])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
]]>
</Resource>
</TestCase>
<TestCase name="testIntersectToExistsRuleOneField">
<Resource name="sql">
<![CDATA[SELECT a.ename FROM emp AS a
INTERSECT
SELECT b.name FROM dept AS b]]>
</Resource>
<Resource name="planBefore">
<![CDATA[
LogicalIntersect(all=[false])
LogicalProject(ENAME=[$1])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
LogicalProject(NAME=[CAST($1):VARCHAR(20) NOT NULL])
LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
]]>
</Resource>
<Resource name="planAfter">
<![CDATA[
LogicalAggregate(group=[{0}])
LogicalFilter(condition=[EXISTS({
LogicalFilter(condition=[IS NOT DISTINCT FROM($0, $cor0.NAME)])
LogicalProject(NAME=[CAST($1):VARCHAR(20) NOT NULL])
LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
})], variablesSet=[[$cor0]])
LogicalProject(ENAME=[$1])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
]]>
</Resource>
</TestCase>
<TestCase name="testIntersectToExistsRulePrimaryKey">
<Resource name="sql">
<![CDATA[SELECT a.empno FROM emp AS a
INTERSECT
SELECT b.empno FROM emp AS b]]>
</Resource>
<Resource name="planBefore">
<![CDATA[
LogicalIntersect(all=[false])
LogicalProject(EMPNO=[$0])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
LogicalProject(EMPNO=[$0])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
]]>
</Resource>
<Resource name="planAfter">
<![CDATA[
LogicalFilter(condition=[EXISTS({
LogicalFilter(condition=[IS NOT DISTINCT FROM($0, $cor0.EMPNO)])
LogicalProject(EMPNO=[$0])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
})], variablesSet=[[$cor0]])
LogicalProject(EMPNO=[$0])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
]]>
</Resource>
</TestCase>
<TestCase name="testIntersectToExistsRuleWithAll">
<Resource name="sql">
<![CDATA[SELECT a.ename FROM emp AS a
INTERSECT ALL
SELECT b.name FROM dept AS b]]>
</Resource>
<Resource name="planBefore">
<![CDATA[
LogicalIntersect(all=[true])
LogicalProject(ENAME=[$1])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
LogicalProject(NAME=[CAST($1):VARCHAR(20) NOT NULL])
LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
]]>
</Resource>
</TestCase>
Expand Down

0 comments on commit c471930

Please sign in to comment.