-
Notifications
You must be signed in to change notification settings - Fork 1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Arithmetic/Logical/String expressions implementation #2
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,11 +19,102 @@ package org.apache.spark.sql | |
package catalyst | ||
package expressions | ||
|
||
import scala.util.matching.Regex | ||
|
||
import catalyst.types.StringType | ||
import catalyst.types.BooleanType | ||
import analysis.UnresolvedException | ||
import catalyst.errors.`package`.TreeNodeException | ||
|
||
|
||
abstract class BinaryString extends BinaryExpression { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This looks good, but I was initially confused by the name. Maybe There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Or maybe BinaryStringPredicate, since it always returns a boolean. |
||
self: Product => | ||
|
||
type EvaluatedType = Any | ||
|
||
def nullable = left.nullable || right.nullable | ||
|
||
override lazy val resolved = | ||
left.resolved && right.resolved && left.dataType == StringType && right.dataType == StringType | ||
|
||
def dataType = { | ||
if (!resolved) { | ||
throw new UnresolvedException(this, | ||
s"datatype. Can not resolve due to non string types ${left.dataType}, ${right.dataType}") | ||
} | ||
|
||
BooleanType | ||
} | ||
|
||
@inline | ||
protected final def s2( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we really need these functions? In the case of of using Numerics you have to cast back and forth multiple times, but I think the resulting code here is actually more verbose than if you just did this inline. |
||
i: Row, | ||
e1: Expression, | ||
e2: Expression, | ||
f: ((String, String) => Boolean)): Any = { | ||
|
||
if (e1.dataType != StringType) { | ||
throw new TreeNodeException(this, s"Types do not match ${e1.dataType} != StringType") | ||
} | ||
|
||
case class Like(left: Expression, right: Expression) extends BinaryExpression { | ||
def dataType = BooleanType | ||
def nullable = left.nullable // Right cannot be null. | ||
if (e2.dataType != StringType) { | ||
throw new TreeNodeException(this, s"Types do not match ${e2.dataType} != StringType") | ||
} | ||
|
||
val evalE1 = e1.apply(i) | ||
if(evalE1 == null) { | ||
null | ||
} else { | ||
val evalE2 = e2.apply(i) | ||
if (evalE2 == null) { | ||
null | ||
} else { | ||
f.apply(evalE1.asInstanceOf[String], evalE1.asInstanceOf[String]) | ||
} | ||
} | ||
} | ||
|
||
@inline | ||
protected final def s1( | ||
i: Row, | ||
e1: Expression, | ||
f: ((String) => Boolean)): Any = { | ||
|
||
if (e1.dataType != StringType) { | ||
throw new TreeNodeException(this, s"Types do not match ${e1.dataType} != StringType") | ||
} | ||
|
||
val evalE1 = e1.apply(i) | ||
if(evalE1 == null) { | ||
null | ||
} else { | ||
f.apply(evalE1.asInstanceOf[String]) | ||
} | ||
} | ||
} | ||
|
||
case class Like(left: Expression, right: Literal) extends BinaryString { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Independent of what is required by the SQL spec, I think both of these should be Expressions. This will give us a lot more flexibility and make it easier to integrate with the parser. You can still get the string value by calling |
||
def symbol = "LIKE" | ||
// replace the _ with .{1} exactly match 1 time of any character | ||
// replace the % with .*, match 0 or more times with any character | ||
def regex(v: String) = v.replaceAll("_", ".{1}").replaceAll("%", ".*") | ||
lazy val r = regex(right.value.asInstanceOf[String]).r | ||
|
||
override def apply(input: Row): Any = if(right.value == null) { | ||
null | ||
} else { | ||
s1(input, left, r.findFirstIn(_) != None) | ||
} | ||
} | ||
|
||
case class RLike(left: Expression, right: Literal) extends BinaryString { | ||
def symbol = "RLIKE" | ||
|
||
lazy val r = right.value.asInstanceOf[String].r | ||
|
||
override def apply(input: Row): Any = if(right.value == null) { | ||
null | ||
} else { | ||
s1(input, left, r.findFirstIn(_) != None) | ||
} | ||
} |
This file was deleted.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think this is doing the right thing for all data types. The string here is getting lifted to a Scala AST String Literal, which means that independent of the dataType, you are splicing a String literal into the AST.