Skip to content

Commit

Permalink
Adds fixes from review:
Browse files Browse the repository at this point in the history
* orders imports in stringOperations.scala
* Substring.dataType throws exception if children are unresolved
* inlines Substring.slice (~11.5% performance improvement on microbenchmark runs)
* adds a special `toString` case for two-argument SUBSTR expressions
* removes spurious I_ prefix to SUBSTR(ING) in HiveQL.scala

Thanks to @concretevitamin for prompt and useful feedback!
  • Loading branch information
willb committed Jul 14, 2014
1 parent 4f3bfdb commit ec35c80
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,9 @@ import java.util.regex.Pattern

import scala.collection.IndexedSeqOptimized

import org.apache.spark.sql.catalyst.types.DataType
import org.apache.spark.sql.catalyst.types.StringType
import org.apache.spark.sql.catalyst.types.BinaryType
import org.apache.spark.sql.catalyst.types.BooleanType

import org.apache.spark.sql.catalyst.analysis.UnresolvedException
import org.apache.spark.sql.catalyst.types.{BinaryType, BooleanType, DataType, StringType}

trait StringRegexExpression {
self: BinaryExpression =>
Expand Down Expand Up @@ -219,13 +218,17 @@ case class Substring(str: Expression, pos: Expression, len: Expression) extends

def nullable: Boolean = true
def dataType: DataType = {
if (!resolved) {
throw new UnresolvedException(this, s"Cannot resolve since $children are not resolved")
}
if (str.dataType == BinaryType) str.dataType else StringType
}

def references = children.flatMap(_.references).toSet

override def children = str :: pos :: len :: Nil

@inline
def slice[T, C <% IndexedSeqOptimized[T,_]](str: C, startPos: Int, sliceLen: Int): Any = {
val len = str.length
// Hive and SQL use one-based indexing for SUBSTR arguments but also accept zero and
Expand Down Expand Up @@ -267,5 +270,8 @@ case class Substring(str: Expression, pos: Expression, len: Expression) extends
}
}

override def toString = s"SUBSTR($str, $pos, $len)"
override def toString = len match {
case max if max == Integer.MAX_VALUE => s"SUBSTR($str, $pos)"
case _ => s"SUBSTR($str, $pos, $len)"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -860,7 +860,7 @@ private[hive] object HiveQl {
val BETWEEN = "(?i)BETWEEN".r
val WHEN = "(?i)WHEN".r
val CASE = "(?i)CASE".r
val SUBSTR = "(?i)I_SUBSTR(?:ING)?".r
val SUBSTR = "(?i)SUBSTR(?:ING)?".r

protected def nodeToExpr(node: Node): Expression = node match {
/* Attribute References */
Expand Down

0 comments on commit ec35c80

Please sign in to comment.