From d65072e483da9fd2dbd4999a4976befe2ce054d1 Mon Sep 17 00:00:00 2001 From: Xiangrui Meng Date: Tue, 7 Oct 2014 14:46:48 -0700 Subject: [PATCH] remove Map.empty --- .../spark/sql/catalyst/ScalaReflection.scala | 4 ++-- .../sql/catalyst/expressions/Expression.scala | 4 +++- .../expressions/namedExpressions.scala | 7 +++---- .../plans/logical/basicOperators.scala | 9 +++++---- .../spark/sql/catalyst/types/dataTypes.scala | 7 ++++--- .../org/apache/spark/sql/MetadataSuite.scala | 19 +++++++++++++------ 6 files changed, 30 insertions(+), 20 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala index 51ba0c3dac321..18e10c95754ff 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala @@ -43,7 +43,7 @@ object ScalaReflection { /** Returns a Sequence of attributes for the given case class type. */ def attributesFor[T: TypeTag]: Seq[Attribute] = schemaFor[T] match { case Schema(s: StructType, _) => - s.fields.map(f => AttributeReference(f.name, f.dataType, f.nullable)()) + s.fields.map(f => AttributeReference(f.name, f.dataType, f.nullable, f.metadata)()) } /** Returns a catalyst DataType and its nullability for the given Scala Type using reflection. */ @@ -62,7 +62,7 @@ object ScalaReflection { params.head.map { p => val Schema(dataType, nullable) = schemaFor(p.typeSignature.substituteTypes(formalTypeArgs, actualTypeArgs)) - StructField(p.name.toString, dataType, nullable, Map.empty) + StructField(p.name.toString, dataType, nullable) }), nullable = true) // Need to decide if we actually need a special type here. case t if t <:< typeOf[Array[Byte]] => Schema(BinaryType, nullable = true) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala index ab0179b14b592..6371582ddc6c9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala @@ -41,9 +41,11 @@ abstract class Expression extends TreeNode[Expression] { */ def foldable: Boolean = false def nullable: Boolean - def metadata: Map[String, Any] = Map.empty def references: AttributeSet = AttributeSet(children.flatMap(_.references.iterator)) + /** Returns the metadata when an expression is a reference to another expression with metadata. */ + def metadata: Map[String, Any] = Map.empty + /** Returns the result of evaluating this expression on a given input Row */ def eval(input: Row = null): EvaluatedType diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala index 5564fa2b09bb6..3167e4ba49a87 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala @@ -85,11 +85,11 @@ case class Alias(child: Expression, name: String) override def dataType = child.dataType override def nullable = child.nullable - + override def metadata: Map[String, Any] = child.metadata override def toAttribute = { if (resolved) { - AttributeReference(name, child.dataType, child.nullable)(exprId, qualifiers) + AttributeReference(name, child.dataType, child.nullable, child.metadata)(exprId, qualifiers) } else { UnresolvedAttribute(name) } @@ -98,8 +98,6 @@ case class Alias(child: Expression, name: String) override def toString: String = s"$child AS $name#${exprId.id}$typeSuffix" override protected final def otherCopyArgs = exprId :: qualifiers :: Nil - - override def metadata: Map[String, Any] = child.metadata } /** @@ -108,6 +106,7 @@ case class Alias(child: Expression, name: String) * @param name The name of this attribute, should only be used during analysis or for debugging. * @param dataType The [[DataType]] of this attribute. * @param nullable True if null is a valid value for this attribute. + * @param metadata The metadata of this attribute. * @param exprId A globally unique id used to check if different AttributeReferences refer to the * same attribute. * @param qualifiers a list of strings that can be used to referred to this attribute in a fully diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala index 6cd2b456ec7c6..c10e751ee3917 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala @@ -163,7 +163,7 @@ case class LowerCaseSchema(child: LogicalPlan) extends UnaryNode { protected def lowerCaseSchema(dataType: DataType): DataType = dataType match { case StructType(fields) => StructType(fields.map(f => - StructField(f.name.toLowerCase(), lowerCaseSchema(f.dataType), f.nullable, f.metadata))) + StructField(f.name.toLowerCase, lowerCaseSchema(f.dataType), f.nullable, f.metadata))) case ArrayType(elemType, containsNull) => ArrayType(lowerCaseSchema(elemType), containsNull) case otherType => otherType } @@ -173,9 +173,10 @@ case class LowerCaseSchema(child: LogicalPlan) extends UnaryNode { AttributeReference( a.name.toLowerCase, lowerCaseSchema(a.dataType), - a.nullable)( - a.exprId, - a.qualifiers) + a.nullable, + a.metadata)( + a.exprId, + a.qualifiers) case other => other } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/dataTypes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/dataTypes.scala index 99bd12dfa5e19..05ac9d23b0284 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/dataTypes.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/dataTypes.scala @@ -298,7 +298,8 @@ case class ArrayType(elementType: DataType, containsNull: Boolean) extends DataT * @param dataType The data type of this field. * @param nullable Indicates if values of this field can be `null` values. * @param metadata The metadata of this field, which is a map from string to simple type that can be - * serialized to JSON automatically. + * serialized to JSON automatically. The metadata should be preserved during + * transformation if the content of the column is not modified, e.g, in selection. */ case class StructField( name: String, @@ -330,8 +331,8 @@ case class StructType(fields: Seq[StructField]) extends DataType { * have a name matching the given name, `null` will be returned. */ def apply(name: String): StructField = { - nameToField.get(name).getOrElse( - throw new IllegalArgumentException(s"Field ${name} does not exist.")) + nameToField.getOrElse(name, + throw new IllegalArgumentException(s"Field $name does not exist.")) } /** diff --git a/sql/core/src/test/scala/org/apache/spark/sql/MetadataSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/MetadataSuite.scala index 3512998f9d832..f289461d8034a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/MetadataSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/MetadataSuite.scala @@ -13,15 +13,22 @@ class MetadataSuite extends FunSuite { val members = sqlContext.sparkContext.makeRDD(Seq( Person("mike", 10), Person("jim", 20))) - val table: SchemaRDD = sqlContext.createSchemaRDD(members) - val schema: StructType = table.schema + val person: SchemaRDD = sqlContext.createSchemaRDD(members) + val schema: StructType = person.schema println("schema: " + schema) - val ageField = schema("age").copy(metadata = Map("desc" -> "age (must be nonnegative)")) + val ageField = schema("age").copy(metadata = Map("doc" -> "age (must be nonnegative)")) val newSchema = schema.copy(Seq(schema("name"), ageField)) - val newTable = sqlContext.applySchema(table, newSchema) + val newTable = sqlContext.applySchema(person, newSchema) + newTable.registerTempTable("person") val selectByExprAgeField = newTable.select('age).schema("age") - assert(selectByExprAgeField.metadata.nonEmpty) + assert(selectByExprAgeField.metadata.contains("doc")) val selectByNameAttrAgeField = newTable.select("age".attr).schema("age") - assert(selectByNameAttrAgeField.metadata.nonEmpty) + assert(selectByNameAttrAgeField.metadata.contains("doc")) + val selectAgeBySQL = sql("SELECT age FROM person").schema("age") + println(selectAgeBySQL) + assert(selectAgeBySQL.metadata.contains("doc")) + val selectStarBySQL = sql("SELECT * FROM person").schema("age") + println(selectStarBySQL) + assert(selectStarBySQL.metadata.contains("doc")) } }