Skip to content

Commit

Permalink
Revert: Move boolean simplification & remove rewrite.
Browse files Browse the repository at this point in the history
  • Loading branch information
hvanhovell committed May 7, 2016
1 parent b487925 commit e28bbb6
Show file tree
Hide file tree
Showing 9 changed files with 109 additions and 131 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog}
import org.apache.spark.sql.catalyst.encoders.OuterScopes
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.expressions.aggregate._
import org.apache.spark.sql.catalyst.optimizer.BooleanSimplification
import org.apache.spark.sql.catalyst.planning.IntegerIndex
import org.apache.spark.sql.catalyst.plans._
import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, _}
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ import scala.collection.immutable.HashSet
import scala.collection.mutable.ArrayBuffer

import org.apache.spark.sql.catalyst.{CatalystConf, SimpleCatalystConf}
import org.apache.spark.sql.catalyst.analysis.{BooleanSimplification, CleanupAliases, DistinctAggregationRewriter, EliminateSubqueryAliases, EmptyFunctionRegistry}
import org.apache.spark.sql.catalyst.analysis.{CleanupAliases, DistinctAggregationRewriter, EliminateSubqueryAliases, EmptyFunctionRegistry}
import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog}
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.expressions.aggregate._
Expand Down Expand Up @@ -688,6 +688,108 @@ case class OptimizeIn(conf: CatalystConf) extends Rule[LogicalPlan] {
}
}

/**
* Simplifies boolean expressions:
* 1. Simplifies expressions whose answer can be determined without evaluating both sides.
* 2. Eliminates / extracts common factors.
* 3. Merge same expressions
* 4. Removes `Not` operator.
*/
object BooleanSimplification extends Rule[LogicalPlan] with PredicateHelper {
def apply(plan: LogicalPlan): LogicalPlan = plan transform {
case q: LogicalPlan => q transformExpressionsUp {
case TrueLiteral And e => e
case e And TrueLiteral => e
case FalseLiteral Or e => e
case e Or FalseLiteral => e

case FalseLiteral And _ => FalseLiteral
case _ And FalseLiteral => FalseLiteral
case TrueLiteral Or _ => TrueLiteral
case _ Or TrueLiteral => TrueLiteral

case a And b if a.semanticEquals(b) => a
case a Or b if a.semanticEquals(b) => a

case a And (b Or c) if Not(a).semanticEquals(b) => And(a, c)
case a And (b Or c) if Not(a).semanticEquals(c) => And(a, b)
case (a Or b) And c if a.semanticEquals(Not(c)) => And(b, c)
case (a Or b) And c if b.semanticEquals(Not(c)) => And(a, c)

case a Or (b And c) if Not(a).semanticEquals(b) => Or(a, c)
case a Or (b And c) if Not(a).semanticEquals(c) => Or(a, b)
case (a And b) Or c if a.semanticEquals(Not(c)) => Or(b, c)
case (a And b) Or c if b.semanticEquals(Not(c)) => Or(a, c)

// Common factor elimination for conjunction
case and @ (left And right) =>
// 1. Split left and right to get the disjunctive predicates,
// i.e. lhs = (a, b), rhs = (a, c)
// 2. Find the common predict between lhsSet and rhsSet, i.e. common = (a)
// 3. Remove common predict from lhsSet and rhsSet, i.e. ldiff = (b), rdiff = (c)
// 4. Apply the formula, get the optimized predicate: common || (ldiff && rdiff)
val lhs = splitDisjunctivePredicates(left)
val rhs = splitDisjunctivePredicates(right)
val common = lhs.filter(e => rhs.exists(e.semanticEquals))
if (common.isEmpty) {
// No common factors, return the original predicate
and
} else {
val ldiff = lhs.filterNot(e => common.exists(e.semanticEquals))
val rdiff = rhs.filterNot(e => common.exists(e.semanticEquals))
if (ldiff.isEmpty || rdiff.isEmpty) {
// (a || b || c || ...) && (a || b) => (a || b)
common.reduce(Or)
} else {
// (a || b || c || ...) && (a || b || d || ...) =>
// ((c || ...) && (d || ...)) || a || b
(common :+ And(ldiff.reduce(Or), rdiff.reduce(Or))).reduce(Or)
}
}

// Common factor elimination for disjunction
case or @ (left Or right) =>
// 1. Split left and right to get the conjunctive predicates,
// i.e. lhs = (a, b), rhs = (a, c)
// 2. Find the common predict between lhsSet and rhsSet, i.e. common = (a)
// 3. Remove common predict from lhsSet and rhsSet, i.e. ldiff = (b), rdiff = (c)
// 4. Apply the formula, get the optimized predicate: common && (ldiff || rdiff)
val lhs = splitConjunctivePredicates(left)
val rhs = splitConjunctivePredicates(right)
val common = lhs.filter(e => rhs.exists(e.semanticEquals))
if (common.isEmpty) {
// No common factors, return the original predicate
or
} else {
val ldiff = lhs.filterNot(e => common.exists(e.semanticEquals))
val rdiff = rhs.filterNot(e => common.exists(e.semanticEquals))
if (ldiff.isEmpty || rdiff.isEmpty) {
// (a && b) || (a && b && c && ...) => a && b
common.reduce(And)
} else {
// (a && b && c && ...) || (a && b && d && ...) =>
// ((c && ...) || (d && ...)) && a && b
(common :+ Or(ldiff.reduce(And), rdiff.reduce(And))).reduce(And)
}
}

case Not(TrueLiteral) => FalseLiteral
case Not(FalseLiteral) => TrueLiteral

case Not(a GreaterThan b) => LessThanOrEqual(a, b)
case Not(a GreaterThanOrEqual b) => LessThan(a, b)

case Not(a LessThan b) => GreaterThanOrEqual(a, b)
case Not(a LessThanOrEqual b) => GreaterThan(a, b)

case Not(a Or b) => And(Not(a), Not(b))
case Not(a And b) => Or(Not(a), Not(b))

case Not(Not(e)) => e
}
}
}

/**
* Simplifies binary comparisons with semantically-equal expressions:
* 1) Replace '<=>' with 'true' literal.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@

package org.apache.spark.sql.catalyst.optimizer

import org.apache.spark.sql.catalyst.analysis.BooleanSimplification
import org.apache.spark.sql.catalyst.dsl.expressions._
import org.apache.spark.sql.catalyst.dsl.plans._
import org.apache.spark.sql.catalyst.plans.logical._
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
package org.apache.spark.sql.catalyst.optimizer

import org.apache.spark.sql.catalyst.SimpleCatalystConf
import org.apache.spark.sql.catalyst.analysis.{BooleanSimplification, EliminateSubqueryAliases, UnresolvedExtractValue}
import org.apache.spark.sql.catalyst.analysis.{EliminateSubqueryAliases, UnresolvedExtractValue}
import org.apache.spark.sql.catalyst.dsl.expressions._
import org.apache.spark.sql.catalyst.dsl.plans._
import org.apache.spark.sql.catalyst.expressions._
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
package org.apache.spark.sql.catalyst.optimizer

import org.apache.spark.sql.catalyst.analysis
import org.apache.spark.sql.catalyst.analysis.{BooleanSimplification, EliminateSubqueryAliases}
import org.apache.spark.sql.catalyst.analysis.EliminateSubqueryAliases
import org.apache.spark.sql.catalyst.dsl.expressions._
import org.apache.spark.sql.catalyst.dsl.plans._
import org.apache.spark.sql.catalyst.expressions._
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
package org.apache.spark.sql.catalyst.optimizer

import org.apache.spark.sql.catalyst.analysis
import org.apache.spark.sql.catalyst.analysis.{BooleanSimplification, EliminateSubqueryAliases}
import org.apache.spark.sql.catalyst.analysis.EliminateSubqueryAliases
import org.apache.spark.sql.catalyst.dsl.expressions._
import org.apache.spark.sql.catalyst.dsl.plans._
import org.apache.spark.sql.catalyst.expressions.Expression
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

package org.apache.spark.sql.catalyst.optimizer

import org.apache.spark.sql.catalyst.analysis.{BooleanSimplification, EliminateSubqueryAliases}
import org.apache.spark.sql.catalyst.analysis.EliminateSubqueryAliases
import org.apache.spark.sql.catalyst.dsl.expressions._
import org.apache.spark.sql.catalyst.dsl.plans._
import org.apache.spark.sql.catalyst.expressions.Add
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
package org.apache.spark.sql.catalyst.optimizer

import org.apache.spark.sql.catalyst.SimpleCatalystConf
import org.apache.spark.sql.catalyst.analysis.{BooleanSimplification, EliminateSubqueryAliases, UnresolvedAttribute}
import org.apache.spark.sql.catalyst.analysis.{EliminateSubqueryAliases, UnresolvedAttribute}
import org.apache.spark.sql.catalyst.dsl.expressions._
import org.apache.spark.sql.catalyst.dsl.plans._
import org.apache.spark.sql.catalyst.expressions._
Expand Down

0 comments on commit e28bbb6

Please sign in to comment.