From d65072e483da9fd2dbd4999a4976befe2ce054d1 Mon Sep 17 00:00:00 2001
From: Xiangrui Meng <meng@databricks.com>
Date: Tue, 7 Oct 2014 14:46:48 -0700
Subject: [PATCH] remove Map.empty

---
 .../spark/sql/catalyst/ScalaReflection.scala  |  4 ++--
 .../sql/catalyst/expressions/Expression.scala |  4 +++-
 .../expressions/namedExpressions.scala        |  7 +++----
 .../plans/logical/basicOperators.scala        |  9 +++++----
 .../spark/sql/catalyst/types/dataTypes.scala  |  7 ++++---
 .../org/apache/spark/sql/MetadataSuite.scala  | 19 +++++++++++++------
 6 files changed, 30 insertions(+), 20 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
index 51ba0c3dac321..18e10c95754ff 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
@@ -43,7 +43,7 @@ object ScalaReflection {
   /** Returns a Sequence of attributes for the given case class type. */
   def attributesFor[T: TypeTag]: Seq[Attribute] = schemaFor[T] match {
     case Schema(s: StructType, _) =>
-      s.fields.map(f => AttributeReference(f.name, f.dataType, f.nullable)())
+      s.fields.map(f => AttributeReference(f.name, f.dataType, f.nullable, f.metadata)())
   }
 
   /** Returns a catalyst DataType and its nullability for the given Scala Type using reflection. */
@@ -62,7 +62,7 @@ object ScalaReflection {
         params.head.map { p =>
           val Schema(dataType, nullable) =
             schemaFor(p.typeSignature.substituteTypes(formalTypeArgs, actualTypeArgs))
-          StructField(p.name.toString, dataType, nullable, Map.empty)
+          StructField(p.name.toString, dataType, nullable)
         }), nullable = true)
     // Need to decide if we actually need a special type here.
     case t if t <:< typeOf[Array[Byte]] => Schema(BinaryType, nullable = true)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
index ab0179b14b592..6371582ddc6c9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
@@ -41,9 +41,11 @@ abstract class Expression extends TreeNode[Expression] {
    */
   def foldable: Boolean = false
   def nullable: Boolean
-  def metadata: Map[String, Any] = Map.empty
   def references: AttributeSet = AttributeSet(children.flatMap(_.references.iterator))
 
+  /** Returns the metadata when an expression is a reference to another expression with metadata. */
+  def metadata: Map[String, Any] = Map.empty
+
   /** Returns the result of evaluating this expression on a given input Row */
   def eval(input: Row = null): EvaluatedType
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
index 5564fa2b09bb6..3167e4ba49a87 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
@@ -85,11 +85,11 @@ case class Alias(child: Expression, name: String)
 
   override def dataType = child.dataType
   override def nullable = child.nullable
-
+  override def metadata: Map[String, Any] = child.metadata
 
   override def toAttribute = {
     if (resolved) {
-      AttributeReference(name, child.dataType, child.nullable)(exprId, qualifiers)
+      AttributeReference(name, child.dataType, child.nullable, child.metadata)(exprId, qualifiers)
     } else {
       UnresolvedAttribute(name)
     }
@@ -98,8 +98,6 @@ case class Alias(child: Expression, name: String)
   override def toString: String = s"$child AS $name#${exprId.id}$typeSuffix"
 
   override protected final def otherCopyArgs = exprId :: qualifiers :: Nil
-
-  override def metadata: Map[String, Any] = child.metadata
 }
 
 /**
@@ -108,6 +106,7 @@ case class Alias(child: Expression, name: String)
  * @param name The name of this attribute, should only be used during analysis or for debugging.
  * @param dataType The [[DataType]] of this attribute.
  * @param nullable True if null is a valid value for this attribute.
+ * @param metadata The metadata of this attribute.
  * @param exprId A globally unique id used to check if different AttributeReferences refer to the
  *               same attribute.
  * @param qualifiers a list of strings that can be used to referred to this attribute in a fully
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala
index 6cd2b456ec7c6..c10e751ee3917 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala
@@ -163,7 +163,7 @@ case class LowerCaseSchema(child: LogicalPlan) extends UnaryNode {
   protected def lowerCaseSchema(dataType: DataType): DataType = dataType match {
     case StructType(fields) =>
       StructType(fields.map(f =>
-        StructField(f.name.toLowerCase(), lowerCaseSchema(f.dataType), f.nullable, f.metadata)))
+        StructField(f.name.toLowerCase, lowerCaseSchema(f.dataType), f.nullable, f.metadata)))
     case ArrayType(elemType, containsNull) => ArrayType(lowerCaseSchema(elemType), containsNull)
     case otherType => otherType
   }
@@ -173,9 +173,10 @@ case class LowerCaseSchema(child: LogicalPlan) extends UnaryNode {
       AttributeReference(
         a.name.toLowerCase,
         lowerCaseSchema(a.dataType),
-        a.nullable)(
-        a.exprId,
-        a.qualifiers)
+        a.nullable,
+        a.metadata)(
+          a.exprId,
+          a.qualifiers)
     case other => other
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/dataTypes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/dataTypes.scala
index 99bd12dfa5e19..05ac9d23b0284 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/dataTypes.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/dataTypes.scala
@@ -298,7 +298,8 @@ case class ArrayType(elementType: DataType, containsNull: Boolean) extends DataT
  * @param dataType The data type of this field.
  * @param nullable Indicates if values of this field can be `null` values.
  * @param metadata The metadata of this field, which is a map from string to simple type that can be
- *                 serialized to JSON automatically.
+ *                 serialized to JSON automatically. The metadata should be preserved during
+ *                 transformation if the content of the column is not modified, e.g, in selection.
  */
 case class StructField(
     name: String,
@@ -330,8 +331,8 @@ case class StructType(fields: Seq[StructField]) extends DataType {
    * have a name matching the given name, `null` will be returned.
    */
   def apply(name: String): StructField = {
-    nameToField.get(name).getOrElse(
-      throw new IllegalArgumentException(s"Field ${name} does not exist."))
+    nameToField.getOrElse(name,
+      throw new IllegalArgumentException(s"Field $name does not exist."))
   }
 
   /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/MetadataSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/MetadataSuite.scala
index 3512998f9d832..f289461d8034a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/MetadataSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/MetadataSuite.scala
@@ -13,15 +13,22 @@ class MetadataSuite extends FunSuite {
     val members = sqlContext.sparkContext.makeRDD(Seq(
       Person("mike", 10),
       Person("jim", 20)))
-    val table: SchemaRDD = sqlContext.createSchemaRDD(members)
-    val schema: StructType = table.schema
+    val person: SchemaRDD = sqlContext.createSchemaRDD(members)
+    val schema: StructType = person.schema
     println("schema: " + schema)
-    val ageField = schema("age").copy(metadata = Map("desc" -> "age (must be nonnegative)"))
+    val ageField = schema("age").copy(metadata = Map("doc" -> "age (must be nonnegative)"))
     val newSchema = schema.copy(Seq(schema("name"), ageField))
-    val newTable = sqlContext.applySchema(table, newSchema)
+    val newTable = sqlContext.applySchema(person, newSchema)
+    newTable.registerTempTable("person")
     val selectByExprAgeField = newTable.select('age).schema("age")
-    assert(selectByExprAgeField.metadata.nonEmpty)
+    assert(selectByExprAgeField.metadata.contains("doc"))
     val selectByNameAttrAgeField = newTable.select("age".attr).schema("age")
-    assert(selectByNameAttrAgeField.metadata.nonEmpty)
+    assert(selectByNameAttrAgeField.metadata.contains("doc"))
+    val selectAgeBySQL = sql("SELECT age FROM person").schema("age")
+    println(selectAgeBySQL)
+    assert(selectAgeBySQL.metadata.contains("doc"))
+    val selectStarBySQL = sql("SELECT * FROM person").schema("age")
+    println(selectStarBySQL)
+    assert(selectStarBySQL.metadata.contains("doc"))
   }
 }