Skip to content

Commit

Permalink
[SPARK-5758][SQL] Use LongType as the default type for integers in JS…
Browse files Browse the repository at this point in the history
…ON schema inference.

Author: Yin Huai <[email protected]>

Closes #4544 from yhuai/jsonUseLongTypeByDefault and squashes the following commits:

6e2ffc2 [Yin Huai] Use LongType as the default type for integers in JSON schema inference.

(cherry picked from commit c352ffb)
Signed-off-by: Michael Armbrust <[email protected]>
  • Loading branch information
yhuai authored and marmbrus committed Feb 12, 2015
1 parent bf0d15c commit b0c79da
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 13 deletions.
12 changes: 8 additions & 4 deletions sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,12 @@

package org.apache.spark.sql.json

import java.io.StringWriter
import java.sql.{Date, Timestamp}
import java.sql.Timestamp

import scala.collection.Map
import scala.collection.convert.Wrappers.{JMapWrapper, JListWrapper}

import com.fasterxml.jackson.core.{JsonGenerator, JsonProcessingException, JsonFactory}
import com.fasterxml.jackson.core.{JsonGenerator, JsonProcessingException}
import com.fasterxml.jackson.databind.ObjectMapper

import org.apache.spark.rdd.RDD
Expand Down Expand Up @@ -178,7 +177,12 @@ private[sql] object JsonRDD extends Logging {
}

private def typeOfPrimitiveValue: PartialFunction[Any, DataType] = {
ScalaReflection.typeOfObject orElse {
// For Integer values, use LongType by default.
val useLongType: PartialFunction[Any, DataType] = {
case value: IntegerType.JvmType => LongType
}

useLongType orElse ScalaReflection.typeOfObject orElse {
// Since we do not have a data type backed by BigInteger,
// when we see a Java BigInteger, we use DecimalType.
case value: java.math.BigInteger => DecimalType.Unlimited
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ public void applySchemaToJSON() {
fields.add(DataTypes.createStructField("bigInteger", DataTypes.createDecimalType(), true));
fields.add(DataTypes.createStructField("boolean", DataTypes.BooleanType, true));
fields.add(DataTypes.createStructField("double", DataTypes.DoubleType, true));
fields.add(DataTypes.createStructField("integer", DataTypes.IntegerType, true));
fields.add(DataTypes.createStructField("integer", DataTypes.LongType, true));
fields.add(DataTypes.createStructField("long", DataTypes.LongType, true));
fields.add(DataTypes.createStructField("null", DataTypes.StringType, true));
fields.add(DataTypes.createStructField("string", DataTypes.StringType, true));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ class JsonSuite extends QueryTest {
StructField("bigInteger", DecimalType.Unlimited, true) ::
StructField("boolean", BooleanType, true) ::
StructField("double", DoubleType, true) ::
StructField("integer", IntegerType, true) ::
StructField("integer", LongType, true) ::
StructField("long", LongType, true) ::
StructField("null", StringType, true) ::
StructField("string", StringType, true) :: Nil)
Expand Down Expand Up @@ -252,7 +252,7 @@ class JsonSuite extends QueryTest {
StructField("arrayOfBigInteger", ArrayType(DecimalType.Unlimited, false), true) ::
StructField("arrayOfBoolean", ArrayType(BooleanType, false), true) ::
StructField("arrayOfDouble", ArrayType(DoubleType, false), true) ::
StructField("arrayOfInteger", ArrayType(IntegerType, false), true) ::
StructField("arrayOfInteger", ArrayType(LongType, false), true) ::
StructField("arrayOfLong", ArrayType(LongType, false), true) ::
StructField("arrayOfNull", ArrayType(StringType, true), true) ::
StructField("arrayOfString", ArrayType(StringType, false), true) ::
Expand All @@ -265,7 +265,7 @@ class JsonSuite extends QueryTest {
StructField("field1", BooleanType, true) ::
StructField("field2", DecimalType.Unlimited, true) :: Nil), true) ::
StructField("structWithArrayFields", StructType(
StructField("field1", ArrayType(IntegerType, false), true) ::
StructField("field1", ArrayType(LongType, false), true) ::
StructField("field2", ArrayType(StringType, false), true) :: Nil), true) :: Nil)

assert(expectedSchema === jsonDF.schema)
Expand Down Expand Up @@ -486,7 +486,7 @@ class JsonSuite extends QueryTest {
val jsonDF = jsonRDD(complexFieldValueTypeConflict)

val expectedSchema = StructType(
StructField("array", ArrayType(IntegerType, false), true) ::
StructField("array", ArrayType(LongType, false), true) ::
StructField("num_struct", StringType, true) ::
StructField("str_array", StringType, true) ::
StructField("struct", StructType(
Expand Down Expand Up @@ -540,7 +540,7 @@ class JsonSuite extends QueryTest {
val expectedSchema = StructType(
StructField("a", BooleanType, true) ::
StructField("b", LongType, true) ::
StructField("c", ArrayType(IntegerType, false), true) ::
StructField("c", ArrayType(LongType, false), true) ::
StructField("d", StructType(
StructField("field", BooleanType, true) :: Nil), true) ::
StructField("e", StringType, true) :: Nil)
Expand All @@ -560,7 +560,7 @@ class JsonSuite extends QueryTest {
StructField("bigInteger", DecimalType.Unlimited, true) ::
StructField("boolean", BooleanType, true) ::
StructField("double", DoubleType, true) ::
StructField("integer", IntegerType, true) ::
StructField("integer", LongType, true) ::
StructField("long", LongType, true) ::
StructField("null", StringType, true) ::
StructField("string", StringType, true) :: Nil)
Expand Down Expand Up @@ -781,12 +781,12 @@ class JsonSuite extends QueryTest {
ArrayType(ArrayType(ArrayType(ArrayType(StringType, false), false), true), false), true) ::
StructField("field2",
ArrayType(ArrayType(
StructType(StructField("Test", IntegerType, true) :: Nil), false), true), true) ::
StructType(StructField("Test", LongType, true) :: Nil), false), true), true) ::
StructField("field3",
ArrayType(ArrayType(
StructType(StructField("Test", StringType, true) :: Nil), true), false), true) ::
StructField("field4",
ArrayType(ArrayType(ArrayType(IntegerType, false), true), false), true) :: Nil)
ArrayType(ArrayType(ArrayType(LongType, false), true), false), true) :: Nil)

assert(schema === jsonDF.schema)

Expand Down

0 comments on commit b0c79da

Please sign in to comment.