From bd40a33d06797b13ccbe60705e6e6df01ba3ccc4 Mon Sep 17 00:00:00 2001
From: Yin Huai <huai@cse.ohio-state.edu>
Date: Mon, 28 Jul 2014 17:16:09 -0700
Subject: [PATCH] Address comments.

---
 python/pyspark/sql.py                         | 88 +++++++++++--------
 .../sql/catalyst/analysis/Analyzer.scala      |  4 +-
 .../catalyst/analysis/HiveTypeCoercion.scala  |  8 +-
 .../catalyst/expressions/BoundAttribute.scala |  5 +-
 .../sql/catalyst/planning/QueryPlanner.scala  |  3 +-
 .../sql/catalyst/planning/patterns.scala      |  7 +-
 .../spark/sql/catalyst/rules/Rule.scala       |  3 +-
 .../sql/catalyst/rules/RuleExecutor.scala     | 13 ++-
 .../spark/sql/api/java/types/ArrayType.java   |  9 +-
 .../spark/sql/api/java/types/BinaryType.java  |  2 +
 .../spark/sql/api/java/types/BooleanType.java |  5 ++
 .../spark/sql/api/java/types/ByteType.java    |  4 +-
 .../spark/sql/api/java/types/DataType.java    | 21 ++++-
 .../spark/sql/api/java/types/DecimalType.java |  2 +
 .../spark/sql/api/java/types/DoubleType.java  |  4 +-
 .../spark/sql/api/java/types/FloatType.java   |  4 +-
 .../spark/sql/api/java/types/IntegerType.java |  4 +-
 .../spark/sql/api/java/types/LongType.java    |  4 +-
 .../spark/sql/api/java/types/MapType.java     |  9 +-
 .../spark/sql/api/java/types/ShortType.java   |  4 +-
 .../spark/sql/api/java/types/StringType.java  |  2 +
 .../spark/sql/api/java/types/StructField.java |  4 +
 .../spark/sql/api/java/types/StructType.java  |  5 ++
 .../sql/api/java/types/TimestampType.java     |  2 +
 .../sql/api/java/types/package-info.java      |  2 +-
 .../org/apache/spark/sql/SQLContext.scala     |  3 +-
 .../apache/spark/sql/execution/Exchange.scala |  2 +-
 .../scala/org/apache/spark/sql/package.scala  |  9 +-
 28 files changed, 154 insertions(+), 78 deletions(-)

diff --git a/python/pyspark/sql.py b/python/pyspark/sql.py
index c0d086acbe955..1e91fdc60b46f 100644
--- a/python/pyspark/sql.py
+++ b/python/pyspark/sql.py
@@ -41,18 +41,18 @@ class StringType(object):
     """
     __metaclass__ = PrimitiveTypeSingleton
 
-    def _get_scala_type_string(self):
+    def __repr__(self):
         return "StringType"
 
 class BinaryType(object):
     """Spark SQL BinaryType
 
-    The data type representing bytes values and bytearray values.
+    The data type representing bytearray values.
 
     """
     __metaclass__ = PrimitiveTypeSingleton
 
-    def _get_scala_type_string(self):
+    def __repr__(self):
         return "BinaryType"
 
 class BooleanType(object):
@@ -63,14 +63,18 @@ class BooleanType(object):
     """
     __metaclass__ = PrimitiveTypeSingleton
 
-    def _get_scala_type_string(self):
+    def __repr__(self):
         return "BooleanType"
 
 class TimestampType(object):
-    """Spark SQL TimestampType"""
+    """Spark SQL TimestampType
+
+    The data type representing datetime.datetime values.
+
+    """
     __metaclass__ = PrimitiveTypeSingleton
 
-    def _get_scala_type_string(self):
+    def __repr__(self):
         return "TimestampType"
 
 class DecimalType(object):
@@ -81,40 +85,48 @@ class DecimalType(object):
     """
     __metaclass__ = PrimitiveTypeSingleton
 
-    def _get_scala_type_string(self):
+    def __repr__(self):
         return "DecimalType"
 
 class DoubleType(object):
     """Spark SQL DoubleType
 
-    The data type representing float values. Because a float value
+    The data type representing float values.
 
     """
     __metaclass__ = PrimitiveTypeSingleton
 
-    def _get_scala_type_string(self):
+    def __repr__(self):
         return "DoubleType"
 
 class FloatType(object):
     """Spark SQL FloatType
 
-    For PySpark, please use L{DoubleType} instead of using L{FloatType}.
+    For now, please use L{DoubleType} instead of using L{FloatType}.
+    Because query evaluation is done in Scala, java.lang.Double will be be used
+    for Python float numbers. Because the underlying JVM type of FloatType is
+    java.lang.Float (in Java) and Float (in scala), there will be a java.lang.ClassCastException
+    if FloatType (Python) used.
 
     """
     __metaclass__ = PrimitiveTypeSingleton
 
-    def _get_scala_type_string(self):
+    def __repr__(self):
         return "FloatType"
 
 class ByteType(object):
     """Spark SQL ByteType
 
-    For PySpark, please use L{IntegerType} instead of using L{ByteType}.
+    For now, please use L{IntegerType} instead of using L{ByteType}.
+    Because query evaluation is done in Scala, java.lang.Integer will be be used
+    for Python int numbers. Because the underlying JVM type of ByteType is
+    java.lang.Byte (in Java) and Byte (in scala), there will be a java.lang.ClassCastException
+    if ByteType (Python) used.
 
     """
     __metaclass__ = PrimitiveTypeSingleton
 
-    def _get_scala_type_string(self):
+    def __repr__(self):
         return "ByteType"
 
 class IntegerType(object):
@@ -125,7 +137,7 @@ class IntegerType(object):
     """
     __metaclass__ = PrimitiveTypeSingleton
 
-    def _get_scala_type_string(self):
+    def __repr__(self):
         return "IntegerType"
 
 class LongType(object):
@@ -137,18 +149,22 @@ class LongType(object):
     """
     __metaclass__ = PrimitiveTypeSingleton
 
-    def _get_scala_type_string(self):
+    def __repr__(self):
         return "LongType"
 
 class ShortType(object):
     """Spark SQL ShortType
 
-    For PySpark, please use L{IntegerType} instead of using L{ShortType}.
+    For now, please use L{IntegerType} instead of using L{ShortType}.
+    Because query evaluation is done in Scala, java.lang.Integer will be be used
+    for Python int numbers. Because the underlying JVM type of ShortType is
+    java.lang.Short (in Java) and Short (in scala), there will be a java.lang.ClassCastException
+    if ShortType (Python) used.
 
     """
     __metaclass__ = PrimitiveTypeSingleton
 
-    def _get_scala_type_string(self):
+    def __repr__(self):
         return "ShortType"
 
 class ArrayType(object):
@@ -157,23 +173,23 @@ class ArrayType(object):
     The data type representing list values.
 
     """
-    def __init__(self, elementType, containsNull):
+    def __init__(self, elementType, containsNull=False):
         """Creates an ArrayType
 
         :param elementType: the data type of elements.
         :param containsNull: indicates whether the list contains null values.
         :return:
 
-        >>> ArrayType(StringType, True) == ArrayType(StringType, False)
-        False
-        >>> ArrayType(StringType, True) == ArrayType(StringType, True)
+        >>> ArrayType(StringType) == ArrayType(StringType, False)
         True
+        >>> ArrayType(StringType, True) == ArrayType(StringType)
+        False
         """
         self.elementType = elementType
         self.containsNull = containsNull
 
-    def _get_scala_type_string(self):
-        return "ArrayType(" + self.elementType._get_scala_type_string() + "," + \
+    def __repr__(self):
+        return "ArrayType(" + self.elementType.__repr__() + "," + \
                str(self.containsNull).lower() + ")"
 
     def __eq__(self, other):
@@ -207,9 +223,9 @@ def __init__(self, keyType, valueType, valueContainsNull=True):
         self.valueType = valueType
         self.valueContainsNull = valueContainsNull
 
-    def _get_scala_type_string(self):
-        return "MapType(" + self.keyType._get_scala_type_string() + "," + \
-               self.valueType._get_scala_type_string() + "," + \
+    def __repr__(self):
+        return "MapType(" + self.keyType.__repr__() + "," + \
+               self.valueType.__repr__() + "," + \
                str(self.valueContainsNull).lower() + ")"
 
     def __eq__(self, other):
@@ -243,9 +259,9 @@ def __init__(self, name, dataType, nullable):
         self.dataType = dataType
         self.nullable = nullable
 
-    def _get_scala_type_string(self):
+    def __repr__(self):
         return "StructField(" + self.name + "," + \
-               self.dataType._get_scala_type_string() + "," + \
+               self.dataType.__repr__() + "," + \
                str(self.nullable).lower() + ")"
 
     def __eq__(self, other):
@@ -280,9 +296,9 @@ def __init__(self, fields):
         """
         self.fields = fields
 
-    def _get_scala_type_string(self):
+    def __repr__(self):
         return "StructType(List(" + \
-               ",".join([field._get_scala_type_string() for field in self.fields]) + "))"
+               ",".join([field.__repr__() for field in self.fields]) + "))"
 
     def __eq__(self, other):
         return (isinstance(other, self.__class__) and \
@@ -319,7 +335,7 @@ def _parse_datatype_string(datatype_string):
     :return:
 
     >>> def check_datatype(datatype):
-    ...     scala_datatype = sqlCtx._ssql_ctx.parseDataType(datatype._get_scala_type_string())
+    ...     scala_datatype = sqlCtx._ssql_ctx.parseDataType(datatype.__repr__())
     ...     python_datatype = _parse_datatype_string(scala_datatype.toString())
     ...     return datatype == python_datatype
     >>> check_datatype(StringType())
@@ -536,7 +552,7 @@ def applySchema(self, rdd, schema):
         True
         """
         jrdd = self._pythonToJavaMap(rdd._jrdd)
-        srdd = self._ssql_ctx.applySchema(jrdd.rdd(), schema._get_scala_type_string())
+        srdd = self._ssql_ctx.applySchema(jrdd.rdd(), schema.__repr__())
         return SchemaRDD(srdd, self)
 
     def registerRDDAsTable(self, rdd, tableName):
@@ -569,7 +585,7 @@ def parquetFile(self, path):
         jschema_rdd = self._ssql_ctx.parquetFile(path)
         return SchemaRDD(jschema_rdd, self)
 
-    def jsonFile(self, path, schema = None):
+    def jsonFile(self, path, schema=None):
         """Loads a text file storing one JSON object per line as a L{SchemaRDD}.
 
         If the schema is provided, applies the given schema to this JSON dataset.
@@ -618,11 +634,11 @@ def jsonFile(self, path, schema = None):
         if schema is None:
             jschema_rdd = self._ssql_ctx.jsonFile(path)
         else:
-            scala_datatype = self._ssql_ctx.parseDataType(schema._get_scala_type_string())
+            scala_datatype = self._ssql_ctx.parseDataType(schema.__repr__())
             jschema_rdd = self._ssql_ctx.jsonFile(path, scala_datatype)
         return SchemaRDD(jschema_rdd, self)
 
-    def jsonRDD(self, rdd, schema = None):
+    def jsonRDD(self, rdd, schema=None):
         """Loads an RDD storing one JSON object per string as a L{SchemaRDD}.
 
         If the schema is provided, applies the given schema to this JSON dataset.
@@ -672,7 +688,7 @@ def func(split, iterator):
         if schema is None:
             jschema_rdd = self._ssql_ctx.jsonRDD(jrdd.rdd())
         else:
-            scala_datatype = self._ssql_ctx.parseDataType(schema._get_scala_type_string())
+            scala_datatype = self._ssql_ctx.parseDataType(schema.__repr__())
             jschema_rdd = self._ssql_ctx.jsonRDD(jrdd.rdd(), scala_datatype)
         return SchemaRDD(jschema_rdd, self)
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 02bdb64f308a5..f847355a43537 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -109,12 +109,12 @@ class Analyzer(catalog: Catalog, registry: FunctionRegistry, caseSensitive: Bool
   object ResolveReferences extends Rule[LogicalPlan] {
     def apply(plan: LogicalPlan): LogicalPlan = plan transformUp {
       case q: LogicalPlan if q.childrenResolved =>
-        logger.trace(s"Attempting to resolve ${q.simpleString}")
+        logTrace(s"Attempting to resolve ${q.simpleString}")
         q transformExpressions {
           case u @ UnresolvedAttribute(name) =>
             // Leave unchanged if resolution fails.  Hopefully will be resolved next round.
             val result = q.resolve(name).getOrElse(u)
-            logger.debug(s"Resolving $u to $result")
+            logDebug(s"Resolving $u to $result")
             result
         }
     }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala
index 47c7ad076ad07..e94f2a3bea63e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala
@@ -75,7 +75,7 @@ trait HiveTypeCoercion {
             // Leave the same if the dataTypes match.
             case Some(newType) if a.dataType == newType.dataType => a
             case Some(newType) =>
-              logger.debug(s"Promoting $a to $newType in ${q.simpleString}}")
+              logDebug(s"Promoting $a to $newType in ${q.simpleString}}")
               newType
           }
       }
@@ -154,7 +154,7 @@ trait HiveTypeCoercion {
             (Alias(Cast(l, StringType), l.name)(), r)
 
           case (l, r) if l.dataType != r.dataType =>
-            logger.debug(s"Resolving mismatched union input ${l.dataType}, ${r.dataType}")
+            logDebug(s"Resolving mismatched union input ${l.dataType}, ${r.dataType}")
             findTightestCommonType(l.dataType, r.dataType).map { widestType =>
               val newLeft =
                 if (l.dataType == widestType) l else Alias(Cast(l, widestType), l.name)()
@@ -170,7 +170,7 @@ trait HiveTypeCoercion {
 
         val newLeft =
           if (castedLeft.map(_.dataType) != left.output.map(_.dataType)) {
-            logger.debug(s"Widening numeric types in union $castedLeft ${left.output}")
+            logDebug(s"Widening numeric types in union $castedLeft ${left.output}")
             Project(castedLeft, left)
           } else {
             left
@@ -178,7 +178,7 @@ trait HiveTypeCoercion {
 
         val newRight =
           if (castedRight.map(_.dataType) != right.output.map(_.dataType)) {
-            logger.debug(s"Widening numeric types in union $castedRight ${right.output}")
+            logDebug(s"Widening numeric types in union $castedRight ${right.output}")
             Project(castedRight, right)
           } else {
             right
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BoundAttribute.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BoundAttribute.scala
index cbc214d442064..92a30810c736d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BoundAttribute.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BoundAttribute.scala
@@ -17,8 +17,7 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
-import com.typesafe.scalalogging.slf4j.Logging
-
+import org.apache.spark.Logging
 import org.apache.spark.sql.catalyst.trees
 import org.apache.spark.sql.catalyst.errors.attachTree
 import org.apache.spark.sql.catalyst.plans.QueryPlan
@@ -80,7 +79,7 @@ object BindReferences extends Logging {
           // produce new attributes that can't be bound.  Likely the right thing to do is remove
           // this rule and require all operators to explicitly bind to the input schema that
           // they specify.
-          logger.debug(s"Couldn't find $a in ${input.mkString("[", ",", "]")}")
+          logDebug(s"Couldn't find $a in ${input.mkString("[", ",", "]")}")
           a
         } else {
           BoundReference(ordinal, a)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/QueryPlanner.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/QueryPlanner.scala
index 4ff5791635f4c..5839c9f7c43ef 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/QueryPlanner.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/QueryPlanner.scala
@@ -17,8 +17,7 @@
 
 package org.apache.spark.sql.catalyst.planning
 
-import com.typesafe.scalalogging.slf4j.Logging
-
+import org.apache.spark.Logging
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.trees.TreeNode
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala
index b8ae326be6fab..820eaeb75f768 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala
@@ -19,8 +19,7 @@ package org.apache.spark.sql.catalyst.planning
 
 import scala.annotation.tailrec
 
-import com.typesafe.scalalogging.slf4j.Logging
-
+import org.apache.spark.Logging
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
@@ -114,7 +113,7 @@ object ExtractEquiJoinKeys extends Logging with PredicateHelper {
 
   def unapply(plan: LogicalPlan): Option[ReturnType] = plan match {
     case join @ Join(left, right, joinType, condition) =>
-      logger.debug(s"Considering join on: $condition")
+      logDebug(s"Considering join on: $condition")
       // Find equi-join predicates that can be evaluated before the join, and thus can be used
       // as join keys.
       val (joinPredicates, otherPredicates) = 
@@ -132,7 +131,7 @@ object ExtractEquiJoinKeys extends Logging with PredicateHelper {
       val rightKeys = joinKeys.map(_._2)
 
       if (joinKeys.nonEmpty) {
-        logger.debug(s"leftKeys:${leftKeys} | rightKeys:${rightKeys}")
+        logDebug(s"leftKeys:${leftKeys} | rightKeys:${rightKeys}")
         Some((joinType, leftKeys, rightKeys, otherPredicates.reduceOption(And), left, right))
       } else {
         None
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/Rule.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/Rule.scala
index f39bff8c25164..03414b2301e81 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/Rule.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/Rule.scala
@@ -17,8 +17,7 @@
 
 package org.apache.spark.sql.catalyst.rules
 
-import com.typesafe.scalalogging.slf4j.Logging
-
+import org.apache.spark.Logging
 import org.apache.spark.sql.catalyst.trees.TreeNode
 
 abstract class Rule[TreeType <: TreeNode[_]] extends Logging {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala
index e70ce66cb745f..e73515ff29377 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala
@@ -17,8 +17,7 @@
 
 package org.apache.spark.sql.catalyst.rules
 
-import com.typesafe.scalalogging.slf4j.Logging
-
+import org.apache.spark.Logging
 import org.apache.spark.sql.catalyst.trees.TreeNode
 import org.apache.spark.sql.catalyst.util.sideBySide
 
@@ -61,7 +60,7 @@ abstract class RuleExecutor[TreeType <: TreeNode[_]] extends Logging {
           case (plan, rule) =>
             val result = rule(plan)
             if (!result.fastEquals(plan)) {
-              logger.trace(
+              logTrace(
                 s"""
                   |=== Applying Rule ${rule.ruleName} ===
                   |${sideBySide(plan.treeString, result.treeString).mkString("\n")}
@@ -72,25 +71,25 @@ abstract class RuleExecutor[TreeType <: TreeNode[_]] extends Logging {
         }
         iteration += 1
         if (iteration > batch.strategy.maxIterations) {
-          logger.info(s"Max iterations ($iteration) reached for batch ${batch.name}")
+          logInfo(s"Max iterations ($iteration) reached for batch ${batch.name}")
           continue = false
         }
 
         if (curPlan.fastEquals(lastPlan)) {
-          logger.trace(s"Fixed point reached for batch ${batch.name} after $iteration iterations.")
+          logTrace(s"Fixed point reached for batch ${batch.name} after $iteration iterations.")
           continue = false
         }
         lastPlan = curPlan
       }
 
       if (!batchStartPlan.fastEquals(curPlan)) {
-        logger.debug(
+        logDebug(
           s"""
           |=== Result of Batch ${batch.name} ===
           |${sideBySide(plan.treeString, curPlan.treeString).mkString("\n")}
         """.stripMargin)
       } else {
-        logger.trace(s"Batch ${batch.name} has no effect.")
+        logTrace(s"Batch ${batch.name} has no effect.")
       }
     }
 
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/types/ArrayType.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/types/ArrayType.java
index 61f52055842e6..17334ca31b2b7 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/api/java/types/ArrayType.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/api/java/types/ArrayType.java
@@ -21,8 +21,13 @@
  * The data type representing Lists.
  * An ArrayType object comprises two fields, {@code DataType elementType} and
  * {@code boolean containsNull}. The field of {@code elementType} is used to specify the type of
- * array elements. The field of {@code containsNull} is used to specify if the array can have
- * any {@code null} value.
+ * array elements. The field of {@code containsNull} is used to specify if the array has
+ * {@code null} values.
+ *
+ * To create an {@link ArrayType},
+ * {@link org.apache.spark.sql.api.java.types.DataType#createArrayType(DataType)} or
+ * {@link org.apache.spark.sql.api.java.types.DataType#createArrayType(DataType, boolean)}
+ * should be used.
  */
 public class ArrayType extends DataType {
   private DataType elementType;
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/types/BinaryType.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/types/BinaryType.java
index c33ee5e25cd32..61703179850e9 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/api/java/types/BinaryType.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/api/java/types/BinaryType.java
@@ -19,6 +19,8 @@
 
 /**
  * The data type representing byte[] values.
+ *
+ * {@code BinaryType} is represented by the singleton object {@link DataType#BinaryType}.
  */
 public class BinaryType extends DataType {
   protected BinaryType() {}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/types/BooleanType.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/types/BooleanType.java
index 38981a21da58d..8fa24d85d1238 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/api/java/types/BooleanType.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/api/java/types/BooleanType.java
@@ -17,6 +17,11 @@
 
 package org.apache.spark.sql.api.java.types;
 
+/**
+ * The data type representing boolean and Boolean values.
+ *
+ * {@code BooleanType} is represented by the singleton object {@link DataType#BooleanType}.
+ */
 public class BooleanType extends DataType {
   protected BooleanType() {}
 }
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/types/ByteType.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/types/ByteType.java
index 16b0d9ecf688c..2de32978e2705 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/api/java/types/ByteType.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/api/java/types/ByteType.java
@@ -18,7 +18,9 @@
 package org.apache.spark.sql.api.java.types;
 
 /**
- * The data type representing Byte values.
+ * The data type representing byte and Byte values.
+ *
+ * {@code ByteType} is represented by the singleton object {@link DataType#ByteType}.
  */
 public class ByteType extends DataType {
   protected ByteType() {}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/types/DataType.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/types/DataType.java
index c67287dea8ba6..6fd04aa2c6c9c 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/api/java/types/DataType.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/api/java/types/DataType.java
@@ -23,6 +23,9 @@
 
 /**
  * The base type of all Spark SQL data types.
+ *
+ * To get/create specific data type, users should use singleton objects and factory methods
+ * provided by this class.
  */
 public abstract class DataType {
 
@@ -81,6 +84,21 @@ public abstract class DataType {
    */
   public static final ShortType ShortType = new ShortType();
 
+  /**
+   * Creates an ArrayType by specifying the data type of elements ({@code elementType}).
+   * The field of {@code containsNull} is set to {@code false}.
+   *
+   * @param elementType
+   * @return
+   */
+  public static ArrayType createArrayType(DataType elementType) {
+    if (elementType == null) {
+      throw new IllegalArgumentException("elementType should not be null.");
+    }
+
+    return new ArrayType(elementType, false);
+  }
+
   /**
    * Creates an ArrayType by specifying the data type of elements ({@code elementType}) and
    * whether the array contains null values ({@code containsNull}).
@@ -98,7 +116,8 @@ public static ArrayType createArrayType(DataType elementType, boolean containsNu
 
   /**
    * Creates a MapType by specifying the data type of keys ({@code keyType}) and values
-   * ({@code keyType}).
+   * ({@code keyType}). The field of {@code valueContainsNull} is set to {@code true}.
+   *
    * @param keyType
    * @param valueType
    * @return
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/types/DecimalType.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/types/DecimalType.java
index d483824999e85..9250491a2d2ca 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/api/java/types/DecimalType.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/api/java/types/DecimalType.java
@@ -19,6 +19,8 @@
 
 /**
  * The data type representing java.math.BigDecimal values.
+ *
+ * {@code DecimalType} is represented by the singleton object {@link DataType#DecimalType}.
  */
 public class DecimalType extends DataType {
   protected DecimalType() {}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/types/DoubleType.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/types/DoubleType.java
index 13a7bf6bbb5ed..3e86917fddc4b 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/api/java/types/DoubleType.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/api/java/types/DoubleType.java
@@ -18,7 +18,9 @@
 package org.apache.spark.sql.api.java.types;
 
 /**
- * The data type representing Double values.
+ * The data type representing double and Double values.
+ *
+ * {@code DoubleType} is represented by the singleton object {@link DataType#DoubleType}.
  */
 public class DoubleType extends DataType {
   protected DoubleType() {}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/types/FloatType.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/types/FloatType.java
index bf47d4fc1fa07..fa860d40176ef 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/api/java/types/FloatType.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/api/java/types/FloatType.java
@@ -18,7 +18,9 @@
 package org.apache.spark.sql.api.java.types;
 
 /**
- * The data type representing Float values.
+ * The data type representing float and Float values.
+ *
+ * {@code FloatType} is represented by the singleton object {@link DataType#FloatType}.
  */
 public class FloatType extends DataType {
   protected FloatType() {}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/types/IntegerType.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/types/IntegerType.java
index f41ec2260df6b..bd973eca2c3ce 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/api/java/types/IntegerType.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/api/java/types/IntegerType.java
@@ -18,7 +18,9 @@
 package org.apache.spark.sql.api.java.types;
 
 /**
- * The data type representing Int values.
+ * The data type representing int and Integer values.
+ *
+ * {@code IntegerType} is represented by the singleton object {@link DataType#IntegerType}.
  */
 public class IntegerType extends DataType {
   protected IntegerType() {}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/types/LongType.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/types/LongType.java
index 7c73a7b506a2b..e00233304cefa 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/api/java/types/LongType.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/api/java/types/LongType.java
@@ -18,7 +18,9 @@
 package org.apache.spark.sql.api.java.types;
 
 /**
- * The data type representing Long values.
+ * The data type representing long and Long values.
+ *
+ * {@code LongType} is represented by the singleton object {@link DataType#LongType}.
  */
 public class LongType extends DataType {
   protected LongType() {}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/types/MapType.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/types/MapType.java
index d116241a0e407..d2270d4b6ff9c 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/api/java/types/MapType.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/api/java/types/MapType.java
@@ -19,9 +19,16 @@
 
 /**
  * The data type representing Maps. A MapType object comprises two fields,
- * {@code DataType keyType} and {@code DataType valueType}.
+ * {@code DataType keyType}, {@code DataType valueType}, and {@code boolean valueContainsNull}.
  * The field of {@code keyType} is used to specify the type of keys in the map.
  * The field of {@code valueType} is used to specify the type of values in the map.
+ * The field of {@code valueContainsNull} is used to specify if map values have
+ * {@code null} values.
+ *
+ * To create a {@link MapType},
+ * {@link org.apache.spark.sql.api.java.types.DataType#createMapType(DataType, DataType)} or
+ * {@link org.apache.spark.sql.api.java.types.DataType#createMapType(DataType, DataType, boolean)}
+ * should be used.
  */
 public class MapType extends DataType {
   private DataType keyType;
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/types/ShortType.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/types/ShortType.java
index 8ffa75a835e63..98f9507acf121 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/api/java/types/ShortType.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/api/java/types/ShortType.java
@@ -18,7 +18,9 @@
 package org.apache.spark.sql.api.java.types;
 
 /**
- * The data type representing Short values.
+ * The data type representing short and Short values.
+ *
+ * {@code ShortType} is represented by the singleton object {@link DataType#ShortType}.
  */
 public class ShortType extends DataType {
   protected ShortType() {}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/types/StringType.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/types/StringType.java
index dd9be52f8c53b..b8e7dbe646071 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/api/java/types/StringType.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/api/java/types/StringType.java
@@ -19,6 +19,8 @@
 
 /**
  * The data type representing String values.
+ *
+ * {@code StringType} is represented by the singleton object {@link DataType#StringType}.
  */
 public class StringType extends DataType {
   protected StringType() {}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/types/StructField.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/types/StructField.java
index 25c82de9641c5..54e9c11ea415e 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/api/java/types/StructField.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/api/java/types/StructField.java
@@ -24,6 +24,10 @@
  * The field of {@code dataType} specifies the data type of a StructField.
  * The field of {@code nullable} specifies if values of a StructField can contain {@code null}
  * values.
+ *
+ * To create a {@link StructField},
+ * {@link org.apache.spark.sql.api.java.types.DataType#createStructField(String, DataType, boolean)}
+ * should be used.
  */
 public class StructField {
   private String name;
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/types/StructType.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/types/StructType.java
index 17142ff672822..33a42f4b16265 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/api/java/types/StructType.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/api/java/types/StructType.java
@@ -23,6 +23,11 @@
 /**
  * The data type representing Rows.
  * A StructType object comprises an array of StructFields.
+ *
+ * To create an {@link StructType},
+ * {@link org.apache.spark.sql.api.java.types.DataType#createStructType(java.util.List)} or
+ * {@link org.apache.spark.sql.api.java.types.DataType#createStructType(StructField[])}
+ * should be used.
  */
 public class StructType extends DataType {
   private StructField[] fields;
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/types/TimestampType.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/types/TimestampType.java
index 8c2f203d950c4..65295779f71ec 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/api/java/types/TimestampType.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/api/java/types/TimestampType.java
@@ -19,6 +19,8 @@
 
 /**
  * The data type representing java.sql.Timestamp values.
+ *
+ * {@code TimestampType} is represented by the singleton object {@link DataType#TimestampType}.
  */
 public class TimestampType extends DataType {
   protected TimestampType() {}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/types/package-info.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/types/package-info.java
index a1c6fcf1430f5..f169ac65e226f 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/api/java/types/package-info.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/api/java/types/package-info.java
@@ -19,4 +19,4 @@
 /**
  * Allows users to get and create Spark SQL data types.
  */
-package org.apache.spark.sql.api.java.types;
\ No newline at end of file
+package org.apache.spark.sql.api.java.types;
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index cb48b689903c7..f93839c66c5d8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -107,7 +107,8 @@ class SQLContext(@transient val sparkContext: SparkContext)
 
   /**
    * Parses the data type in our internal string representation. The data type string should
-   * have the same format as the one generate by `toString` in scala.
+   * have the same format as the one generated by `toString` in scala.
+   * It is only used by PySpark.
    */
   private[sql] def parseDataType(dataTypeString: String): DataType = {
     val parser = org.apache.spark.sql.catalyst.types.DataType
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/Exchange.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/Exchange.scala
index 00010ef6e798a..67d70d599c3f7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/Exchange.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/Exchange.scala
@@ -99,7 +99,7 @@ private[sql] case class AddExchange(sqlContext: SQLContext) extends Rule[SparkPl
         !operator.requiredChildDistribution.zip(operator.children).map {
           case (required, child) =>
             val valid = child.outputPartitioning.satisfies(required)
-            logger.debug(
+            logDebug(
               s"${if (valid) "Valid" else "Invalid"} distribution," +
                 s"required: $required current: ${child.outputPartitioning}")
             valid
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/package.scala b/sql/core/src/main/scala/org/apache/spark/sql/package.scala
index 819e36bbf9c02..e7732fd7d8336 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/package.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/package.scala
@@ -17,10 +17,7 @@
 
 package org.apache.spark
 
-import scala.collection.JavaConverters._
-
 import org.apache.spark.annotation.DeveloperApi
-import org.apache.spark.sql.api.java.types.{DataType => JDataType, StructField => JStructField}
 
 /**
  * Allows the execution of relational queries, including those expressed in SQL using Spark.
@@ -243,10 +240,12 @@ package object sql {
   /**
    * :: DeveloperApi ::
    *
-   * The data type representing `Seq`s.
+   * The data type for collections of multiple values.
+   * Internally these are represented as columns that contain a ``scala.collection.Seq``.
+   *
    * An [[ArrayType]] object comprises two fields, `elementType: [[DataType]]` and
    * `containsNull: Boolean`. The field of `elementType` is used to specify the type of
-   * array elements. The field of `containsNull` is used to specify if the array has `null` valus.
+   * array elements. The field of `containsNull` is used to specify if the array has `null` values.
    *
    * @group dataType
    */