From 0fbdb2efb2c1a6fa1a2f8e000b0ce471eca95761 Mon Sep 17 00:00:00 2001 From: Cheng Lian Date: Tue, 22 Mar 2016 19:20:56 +0800 Subject: [PATCH] [SPARK-13473][SQL] Simplifies PushPredicateThroughProject ## What changes were proposed in this pull request? This is a follow-up of PR #11348. After PR #11348, a predicate is never pushed through a project as long as the project contains any non-deterministic fields. Thus, it's impossible that the candidate filter condition can reference any non-deterministic projected fields, and related logic can be safely cleaned up. To be more specific, the following optimization is allowed: ```scala // From: df.select('a, 'b).filter('c > rand(42)) // To: df.filter('c > rand(42)).select('a, 'b) ``` while this isn't: ```scala // From: df.select('a, rand('b) as 'rb, 'c).filter('c > 'rb) // To: df.filter('c > rand('b)).select('a, rand('b) as 'rb, 'c) ``` ## How was this patch tested? Existing test cases should do the work. Author: Cheng Lian Closes #11864 from liancheng/spark-13473-cleanup. --- .../sql/catalyst/optimizer/Optimizer.scala | 24 +------------------ 1 file changed, 1 insertion(+), 23 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 41e8dc0f46746..0840d46e4e5ec 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -891,29 +891,7 @@ object PushPredicateThroughProject extends Rule[LogicalPlan] with PredicateHelpe case a: Alias => (a.toAttribute, a.child) }) - // Split the condition into small conditions by `And`, so that we can push down part of this - // condition without nondeterministic expressions. - val andConditions = splitConjunctivePredicates(condition) - - val (deterministic, nondeterministic) = andConditions.partition(_.collect { - case a: Attribute if aliasMap.contains(a) => aliasMap(a) - }.forall(_.deterministic)) - - // If there is no nondeterministic conditions, push down the whole condition. - if (nondeterministic.isEmpty) { - project.copy(child = Filter(replaceAlias(condition, aliasMap), grandChild)) - } else { - // If they are all nondeterministic conditions, leave it un-changed. - if (deterministic.isEmpty) { - filter - } else { - // Push down the small conditions without nondeterministic expressions. - val pushedCondition = - deterministic.map(replaceAlias(_, aliasMap)).reduce(And) - Filter(nondeterministic.reduce(And), - project.copy(child = Filter(pushedCondition, grandChild))) - } - } + project.copy(child = Filter(replaceAlias(condition, aliasMap), grandChild)) } }