Skip to content

Commit

Permalink
[SPARK-24571][SQL] Support Char literals
Browse files Browse the repository at this point in the history
## What changes were proposed in this pull request?

In the PR, I propose to automatically convert a `Literal` with `Char` type to a `Literal` of `String` type. Currently, the following code:
```scala
val df = Seq("Amsterdam", "San Francisco", "London").toDF("city")
df.where($"city".contains('o')).show(false)
```
fails with the exception:
```
Unsupported literal type class java.lang.Character o
java.lang.RuntimeException: Unsupported literal type class java.lang.Character o
at org.apache.spark.sql.catalyst.expressions.Literal$.apply(literals.scala:78)
```
The PR fixes this issue by converting `char` to `string` of length `1`. I believe it makes sense to does not differentiate `char` and `string(1)` in _a unified, multi-language data platform_ like Spark which supports languages like Python/R.

Author: Maxim Gekk <[email protected]>
Author: Maxim Gekk <[email protected]>

Closes #21578 from MaxGekk/support-char-literals.
  • Loading branch information
MaxGekk authored and gatorsmile committed Jun 21, 2018
1 parent 9de11d3 commit 54fcaaf
Show file tree
Hide file tree
Showing 5 changed files with 25 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,7 @@ object CatalystTypeConverters {
override def toCatalystImpl(scalaValue: Any): UTF8String = scalaValue match {
case str: String => UTF8String.fromString(str)
case utf8: UTF8String => utf8
case chr: Char => UTF8String.fromString(chr.toString)
case other => throw new IllegalArgumentException(
s"The value (${other.toString}) of the type (${other.getClass.getCanonicalName}) "
+ s"cannot be converted to the string type")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ object Literal {
case b: Byte => Literal(b, ByteType)
case s: Short => Literal(s, ShortType)
case s: String => Literal(UTF8String.fromString(s), StringType)
case c: Char => Literal(UTF8String.fromString(c.toString), StringType)
case b: Boolean => Literal(b, BooleanType)
case d: BigDecimal => Literal(Decimal(d), DecimalType.fromBigDecimal(d))
case d: JavaBigDecimal =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.expressions.UnsafeArrayData
import org.apache.spark.sql.catalyst.util.GenericArrayData
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.UTF8String

class CatalystTypeConvertersSuite extends SparkFunSuite {

Expand Down Expand Up @@ -139,4 +140,11 @@ class CatalystTypeConvertersSuite extends SparkFunSuite {
assert(exception.getMessage.contains("The value (0.1) of the type "
+ "(java.lang.Double) cannot be converted to the string type"))
}

test("SPARK-24571: convert Char to String") {
val chr: Char = 'X'
val converter = CatalystTypeConverters.createToCatalystConverter(StringType)
val expected = UTF8String.fromString("X")
assert(converter(chr) === expected)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -219,4 +219,11 @@ class LiteralExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
checkUnsupportedTypeInLiteral(Map("key1" -> 1, "key2" -> 2))
checkUnsupportedTypeInLiteral(("mike", 29, 1.0))
}

test("SPARK-24571: char literals") {
checkEvaluation(Literal('X'), "X")
checkEvaluation(Literal.create('0'), "0")
checkEvaluation(Literal('\u0000'), "\u0000")
checkEvaluation(Literal.create('\n'), "\n")
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1479,6 +1479,14 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
assert(ds1.schema == ds2.schema)
checkDataset(ds1.select("_2._2"), ds2.select("_2._2").collect(): _*)
}

test("SPARK-24571: filtering of string values by char literal") {
val df = Seq("Amsterdam", "San Francisco", "X").toDF("city")
checkAnswer(df.where('city === 'X'), Seq(Row("X")))
checkAnswer(
df.where($"city".contains(new java.lang.Character('A'))),
Seq(Row("Amsterdam")))
}
}

case class TestDataUnion(x: Int, y: Int, z: Int)
Expand Down

0 comments on commit 54fcaaf

Please sign in to comment.