From f9a8c763a8b45e467566a973b7894aaecb9248d4 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Wed, 17 Aug 2016 16:49:07 +0800 Subject: [PATCH 1/3] Fix LogicalRelation.newInstance. --- .../sql/execution/datasources/LogicalRelation.scala | 11 +++++++++-- .../org/apache/spark/sql/hive/parquetSuites.scala | 7 +++++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/LogicalRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/LogicalRelation.scala index 90711f2b1dde4..c2805e73e6560 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/LogicalRelation.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/LogicalRelation.scala @@ -79,11 +79,18 @@ case class LogicalRelation( /** Used to lookup original attribute capitalization */ val attributeMap: AttributeMap[AttributeReference] = AttributeMap(output.map(o => (o, o))) - def newInstance(): this.type = + /** + * Returns a new instance of this LogicalRelation. According to the semantics of + * MultiInstanceRelation, this method should returns a copy of this object with + * unique expression ids. Thus we don't respect the `expectedOutputAttributes` and + * create new instances of original output attributes. + */ + override def newInstance(): this.type = { LogicalRelation( relation, - expectedOutputAttributes, + Some(output.map(_.newInstance())), metastoreTableIdentifier).asInstanceOf[this.type] + } override def refresh(): Unit = relation match { case fs: HadoopFsRelation => fs.refresh() diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala index 31b6197d56fc7..e92bbdea75a7b 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala @@ -589,6 +589,13 @@ class ParquetMetastoreSuite extends ParquetPartitioningTest { } } } + + test("self-join") { + val table = spark.table("normal_parquet") + val selfJoin = table.as("t1").join(table.as("t2")) + checkAnswer(selfJoin, + sql("SELECT * FROM normal_parquet x JOIN normal_parquet y")) + } } /** From e243323cb04880c20fb40e1aed8b4a28022d5540 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Fri, 19 Aug 2016 22:25:16 +0800 Subject: [PATCH 2/3] Address comment. --- .../spark/sql/execution/datasources/LogicalRelation.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/LogicalRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/LogicalRelation.scala index c2805e73e6560..6223fc20309b9 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/LogicalRelation.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/LogicalRelation.scala @@ -88,7 +88,7 @@ case class LogicalRelation( override def newInstance(): this.type = { LogicalRelation( relation, - Some(output.map(_.newInstance())), + expectedOutputAttributes.map(_.map(_.newInstance())), metastoreTableIdentifier).asInstanceOf[this.type] } From e7fe68b002594a294b199317be3e2d8fc250eb4e Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Sat, 20 Aug 2016 13:28:10 +0800 Subject: [PATCH 3/3] Update the doc. --- .../spark/sql/execution/datasources/LogicalRelation.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/LogicalRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/LogicalRelation.scala index 6223fc20309b9..2a8e147011f55 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/LogicalRelation.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/LogicalRelation.scala @@ -81,9 +81,9 @@ case class LogicalRelation( /** * Returns a new instance of this LogicalRelation. According to the semantics of - * MultiInstanceRelation, this method should returns a copy of this object with - * unique expression ids. Thus we don't respect the `expectedOutputAttributes` and - * create new instances of original output attributes. + * MultiInstanceRelation, this method returns a copy of this object with + * unique expression ids. We respect the `expectedOutputAttributes` and create + * new instances of attributes in it. */ override def newInstance(): this.type = { LogicalRelation(