From 6479eb40c597f28c8bf2a003e7eafed6061f14a8 Mon Sep 17 00:00:00 2001 From: Reynold Xin Date: Thu, 24 Mar 2016 16:42:08 -0700 Subject: [PATCH 1/2] [SPARK-14142][SQL] Replace internal use of unionAll with union --- python/pyspark/sql/dataframe.py | 4 ++-- python/pyspark/sql/tests.py | 4 ++-- .../apache/spark/sql/catalyst/dsl/package.scala | 2 +- .../sql/catalyst/analysis/AnalysisErrorSuite.scala | 2 +- .../sql/catalyst/analysis/AnalysisSuite.scala | 2 +- .../sql/catalyst/optimizer/PruneFiltersSuite.scala | 4 ++-- .../plans/ConstraintPropagationSuite.scala | 8 ++++---- .../main/scala/org/apache/spark/sql/Dataset.scala | 4 ++-- .../spark/sql/execution/streaming/memory.scala | 2 +- .../org/apache/spark/sql/CachedTableSuite.scala | 2 +- .../org/apache/spark/sql/DataFrameStatSuite.scala | 2 +- .../org/apache/spark/sql/DataFrameSuite.scala | 12 ++++++------ .../scala/org/apache/spark/sql/JoinSuite.scala | 2 +- .../scala/org/apache/spark/sql/SQLQuerySuite.scala | 4 ++-- .../sql/execution/ExchangeCoordinatorSuite.scala | 2 +- .../spark/sql/sources/PartitionedWriteSuite.scala | 2 +- .../apache/spark/sql/sources/SaveLoadSuite.scala | 2 +- .../apache/spark/sql/hive/JavaDataFrameSuite.java | 2 +- .../apache/spark/sql/hive/CachedTableSuite.scala | 4 ++-- .../apache/spark/sql/hive/MultiDatabaseSuite.scala | 14 +++++++------- .../sql/sources/ParquetHadoopFsRelationSuite.scala | 2 +- .../spark/sql/sources/hadoopFsRelationSuites.scala | 12 ++++++------ 22 files changed, 47 insertions(+), 47 deletions(-) diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index 5cfc348a69caf..7a69c4c70cba3 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -360,7 +360,7 @@ def repartition(self, numPartitions, *cols): >>> df.repartition(10).rdd.getNumPartitions() 10 - >>> data = df.unionAll(df).repartition("age") + >>> data = df.union(df).repartition("age") >>> data.show() +---+-----+ |age| name| @@ -919,7 +919,7 @@ def union(self, other): This is equivalent to `UNION ALL` in SQL. To do a SQL-style set union (that does deduplication of elements), use this function followed by a distinct. """ - return DataFrame(self._jdf.unionAll(other._jdf), self.sql_ctx) + return DataFrame(self._jdf.union(other._jdf), self.sql_ctx) @since(1.3) def unionAll(self, other): diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py index 9722e9e9cae22..83ef76c13cee3 100644 --- a/python/pyspark/sql/tests.py +++ b/python/pyspark/sql/tests.py @@ -599,7 +599,7 @@ def test_parquet_with_udt(self): point = df1.head().point self.assertEqual(point, PythonOnlyPoint(1.0, 2.0)) - def test_unionAll_with_udt(self): + def test_union_with_udt(self): from pyspark.sql.tests import ExamplePoint, ExamplePointUDT row1 = (1.0, ExamplePoint(1.0, 2.0)) row2 = (2.0, ExamplePoint(3.0, 4.0)) @@ -608,7 +608,7 @@ def test_unionAll_with_udt(self): df1 = self.sqlCtx.createDataFrame([row1], schema) df2 = self.sqlCtx.createDataFrame([row2], schema) - result = df1.unionAll(df2).orderBy("label").collect() + result = df1.union(df2).orderBy("label").collect() self.assertEqual( result, [ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala index dc5264e2660d8..3540014c3e99c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala @@ -280,7 +280,7 @@ package object dsl { def intersect(otherPlan: LogicalPlan): LogicalPlan = Intersect(logicalPlan, otherPlan) - def unionAll(otherPlan: LogicalPlan): LogicalPlan = Union(logicalPlan, otherPlan) + def union(otherPlan: LogicalPlan): LogicalPlan = Union(logicalPlan, otherPlan) def generate( generator: Generator, diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala index c87a2e24bdb48..a90dfc50392d5 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala @@ -250,7 +250,7 @@ class AnalysisErrorSuite extends AnalysisTest { errorTest( "union with unequal number of columns", - testRelation.unionAll(testRelation2), + testRelation.union(testRelation2), "union" :: "number of columns" :: testRelation2.output.length.toString :: testRelation.output.length.toString :: Nil) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala index 8b568b6dd6acd..9563f43259fbe 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala @@ -32,7 +32,7 @@ class AnalysisSuite extends AnalysisTest { val plan = (1 to 100) .map(_ => testRelation) .fold[LogicalPlan](testRelation) { (a, b) => - a.select(UnresolvedStar(None)).select('a).unionAll(b.select(UnresolvedStar(None))) + a.select(UnresolvedStar(None)).select('a).union(b.select(UnresolvedStar(None))) } assertAnalysisSuccess(plan) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PruneFiltersSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PruneFiltersSuite.scala index 0ee7cf92097e1..14fb72a8a3439 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PruneFiltersSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PruneFiltersSuite.scala @@ -60,8 +60,8 @@ class PruneFiltersSuite extends PlanTest { val query = tr1.where('a.attr > 10) - .unionAll(tr2.where('d.attr > 10) - .unionAll(tr3.where('g.attr > 10))) + .union(tr2.where('d.attr > 10) + .union(tr3.where('g.attr > 10))) val queryWithUselessFilter = query.where('a.attr > 10) val optimized = Optimize.execute(queryWithUselessFilter.analyze) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala index a9375a740daac..f3ab026192f09 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala @@ -109,14 +109,14 @@ class ConstraintPropagationSuite extends SparkFunSuite { assert(tr1 .where('a.attr > 10) - .unionAll(tr2.where('e.attr > 10) - .unionAll(tr3.where('i.attr > 10))) + .union(tr2.where('e.attr > 10) + .union(tr3.where('i.attr > 10))) .analyze.constraints.isEmpty) verifyConstraints(tr1 .where('a.attr > 10) - .unionAll(tr2.where('d.attr > 10) - .unionAll(tr3.where('g.attr > 10))) + .union(tr2.where('d.attr > 10) + .union(tr3.where('g.attr > 10))) .analyze.constraints, ExpressionSet(Seq(resolveColumn(tr1, "a") > 10, IsNotNull(resolveColumn(tr1, "a"))))) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index ec0b3c78ed72c..b377a3580c6cf 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -1361,8 +1361,8 @@ class Dataset[T] private[sql]( * by a [[distinct]]. * * @group typedrel - * @since 2.0.0 - */ + * @since 2.0.0 + */ def union(other: Dataset[T]): Dataset[T] = withTypedPlan { // This breaks caching, but it's usually ok because it addresses a very specific use case: // using union to union many files or partitions. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala index 8bc8bcaa966b1..0f91e59e04ac9 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala @@ -97,7 +97,7 @@ case class MemoryStream[A : Encoder](id: Int, sqlContext: SQLContext) s"MemoryBatch [$startOrdinal, $endOrdinal]: ${newBlocks.flatMap(_.collect()).mkString(", ")}") newBlocks .map(_.toDF()) - .reduceOption(_ unionAll _) + .reduceOption(_ union _) .getOrElse { sys.error("No data selected!") } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala index efa2eeaf4d751..82b79c791db40 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala @@ -363,7 +363,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSQLContext } test("A cached table preserves the partitioning and ordering of its cached SparkPlan") { - val table3x = testData.unionAll(testData).unionAll(testData) + val table3x = testData.union(testData).union(testData) table3x.registerTempTable("testData3x") sql("SELECT key, value FROM testData3x ORDER BY key").registerTempTable("orderedTable") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala index fe12aa809909a..0ea7727e45029 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala @@ -57,7 +57,7 @@ class DataFrameStatSuite extends QueryTest with SharedSQLContext { val splits = data.randomSplit(Array[Double](1, 2, 3), seed) assert(splits.length == 3, "wrong number of splits") - assert(splits.reduce((a, b) => a.unionAll(b)).sort("id").collect().toList == + assert(splits.reduce((a, b) => a.union(b)).sort("id").collect().toList == data.collect().toList, "incomplete or wrong split") val s = splits.map(_.count()) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index ec4e7b2042bc4..86c6405522363 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -94,8 +94,8 @@ class DataFrameSuite extends QueryTest with SharedSQLContext { } test("union all") { - val unionDF = testData.unionAll(testData).unionAll(testData) - .unionAll(testData).unionAll(testData) + val unionDF = testData.union(testData).union(testData) + .union(testData).union(testData) // Before optimizer, Union should be combined. assert(unionDF.queryExecution.analyzed.collect { @@ -107,7 +107,7 @@ class DataFrameSuite extends QueryTest with SharedSQLContext { ) } - test("unionAll should union DataFrames with UDTs (SPARK-13410)") { + test("union should union DataFrames with UDTs (SPARK-13410)") { val rowRDD1 = sparkContext.parallelize(Seq(Row(1, new ExamplePoint(1.0, 2.0)))) val schema1 = StructType(Array(StructField("label", IntegerType, false), StructField("point", new ExamplePointUDT(), false))) @@ -118,7 +118,7 @@ class DataFrameSuite extends QueryTest with SharedSQLContext { val df2 = sqlContext.createDataFrame(rowRDD2, schema2) checkAnswer( - df1.unionAll(df2).orderBy("label"), + df1.union(df2).orderBy("label"), Seq(Row(1, new ExamplePoint(1.0, 2.0)), Row(2, new ExamplePoint(3.0, 4.0))) ) } @@ -636,7 +636,7 @@ class DataFrameSuite extends QueryTest with SharedSQLContext { val jsonDF = sqlContext.read.json(jsonDir) assert(parquetDF.inputFiles.nonEmpty) - val unioned = jsonDF.unionAll(parquetDF).inputFiles.sorted + val unioned = jsonDF.union(parquetDF).inputFiles.sorted val allFiles = (jsonDF.inputFiles ++ parquetDF.inputFiles).distinct.sorted assert(unioned === allFiles) } @@ -1104,7 +1104,7 @@ class DataFrameSuite extends QueryTest with SharedSQLContext { } } - val union = df1.unionAll(df2) + val union = df1.union(df2) checkAnswer( union.filter('i < rand(7) * 10), expected(union) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala index dfffa4bc8b1c0..5af1a4fcd7769 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala @@ -184,7 +184,7 @@ class JoinSuite extends QueryTest with SharedSQLContext { } test("big inner join, 4 matches per row") { - val bigData = testData.unionAll(testData).unionAll(testData).unionAll(testData) + val bigData = testData.union(testData).union(testData).union(testData) val bigDataX = bigData.as("x") val bigDataY = bigData.as("y") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index eb486a135f00a..d0030e145420a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -250,8 +250,8 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { test("aggregation with codegen") { // Prepare a table that we can group some rows. sqlContext.table("testData") - .unionAll(sqlContext.table("testData")) - .unionAll(sqlContext.table("testData")) + .union(sqlContext.table("testData")) + .union(sqlContext.table("testData")) .registerTempTable("testData3x") try { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ExchangeCoordinatorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ExchangeCoordinatorSuite.scala index 4f01e46633c8e..01d485ce2d713 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/ExchangeCoordinatorSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ExchangeCoordinatorSuite.scala @@ -342,7 +342,7 @@ class ExchangeCoordinatorSuite extends SparkFunSuite with BeforeAndAfterAll { sqlContext .range(0, 1000) .selectExpr("id % 500 as key", "id as value") - .unionAll(sqlContext.range(0, 1000).selectExpr("id % 500 as key", "id as value")) + .union(sqlContext.range(0, 1000).selectExpr("id % 500 as key", "id as value")) checkAnswer( join, expectedAnswer.collect()) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/PartitionedWriteSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/PartitionedWriteSuite.scala index 27b02d6e1ab36..a9b1970a7c393 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/PartitionedWriteSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/PartitionedWriteSuite.scala @@ -44,7 +44,7 @@ class PartitionedWriteSuite extends QueryTest with SharedSQLContext { path.delete() val base = sqlContext.range(100) - val df = base.unionAll(base).select($"id", lit(1).as("data")) + val df = base.union(base).select($"id", lit(1).as("data")) df.write.partitionBy("id").save(path.getCanonicalPath) checkAnswer( diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/SaveLoadSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/SaveLoadSuite.scala index 588f6e268f31c..bb2c54aa64977 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/SaveLoadSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/SaveLoadSuite.scala @@ -122,7 +122,7 @@ class SaveLoadSuite extends DataSourceTest with SharedSQLContext with BeforeAndA // verify the append mode df.write.mode(SaveMode.Append).json(path.toString) - val df2 = df.unionAll(df) + val df2 = df.union(df) df2.registerTempTable("jsonTable2") checkLoad(df2, "jsonTable2") diff --git a/sql/hive/src/test/java/org/apache/spark/sql/hive/JavaDataFrameSuite.java b/sql/hive/src/test/java/org/apache/spark/sql/hive/JavaDataFrameSuite.java index 63fb4b7cf726f..397421ae92a47 100644 --- a/sql/hive/src/test/java/org/apache/spark/sql/hive/JavaDataFrameSuite.java +++ b/sql/hive/src/test/java/org/apache/spark/sql/hive/JavaDataFrameSuite.java @@ -82,7 +82,7 @@ public void saveTableAndQueryIt() { @Test public void testUDAF() { - Dataset df = hc.range(0, 100).unionAll(hc.range(0, 100)).select(col("id").as("value")); + Dataset df = hc.range(0, 100).union(hc.range(0, 100)).select(col("id").as("value")); UserDefinedAggregateFunction udaf = new MyDoubleSum(); UserDefinedAggregateFunction registeredUDAF = hc.udf().register("mydoublesum", udaf); // Create Columns for the UDAF. For now, callUDF does not take an argument to specific if diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala index 656c1317c1d9f..11384a0275ae3 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala @@ -186,7 +186,7 @@ class CachedTableSuite extends QueryTest with TestHiveSingleton { assertCached(table("refreshTable")) checkAnswer( table("refreshTable"), - table("src").unionAll(table("src")).collect()) + table("src").union(table("src")).collect()) // Drop the table and create it again. sql("DROP TABLE refreshTable") @@ -198,7 +198,7 @@ class CachedTableSuite extends QueryTest with TestHiveSingleton { sql("REFRESH TABLE refreshTable") checkAnswer( table("refreshTable"), - table("src").unionAll(table("src")).collect()) + table("src").union(table("src")).collect()) // It is not cached. assert(!isCached("refreshTable"), "refreshTable should not be cached.") diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MultiDatabaseSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MultiDatabaseSuite.scala index d275190744002..f3af60a0186f2 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MultiDatabaseSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MultiDatabaseSuite.scala @@ -113,11 +113,11 @@ class MultiDatabaseSuite extends QueryTest with SQLTestUtils with TestHiveSingle df.write.mode(SaveMode.Overwrite).saveAsTable("t") df.write.mode(SaveMode.Append).saveAsTable("t") assert(sqlContext.tableNames().contains("t")) - checkAnswer(sqlContext.table("t"), df.unionAll(df)) + checkAnswer(sqlContext.table("t"), df.union(df)) } assert(sqlContext.tableNames(db).contains("t")) - checkAnswer(sqlContext.table(s"$db.t"), df.unionAll(df)) + checkAnswer(sqlContext.table(s"$db.t"), df.union(df)) checkTablePath(db, "t") } @@ -128,7 +128,7 @@ class MultiDatabaseSuite extends QueryTest with SQLTestUtils with TestHiveSingle df.write.mode(SaveMode.Overwrite).saveAsTable(s"$db.t") df.write.mode(SaveMode.Append).saveAsTable(s"$db.t") assert(sqlContext.tableNames(db).contains("t")) - checkAnswer(sqlContext.table(s"$db.t"), df.unionAll(df)) + checkAnswer(sqlContext.table(s"$db.t"), df.union(df)) checkTablePath(db, "t") } @@ -141,7 +141,7 @@ class MultiDatabaseSuite extends QueryTest with SQLTestUtils with TestHiveSingle assert(sqlContext.tableNames().contains("t")) df.write.insertInto(s"$db.t") - checkAnswer(sqlContext.table(s"$db.t"), df.unionAll(df)) + checkAnswer(sqlContext.table(s"$db.t"), df.union(df)) } } } @@ -156,7 +156,7 @@ class MultiDatabaseSuite extends QueryTest with SQLTestUtils with TestHiveSingle assert(sqlContext.tableNames(db).contains("t")) df.write.insertInto(s"$db.t") - checkAnswer(sqlContext.table(s"$db.t"), df.unionAll(df)) + checkAnswer(sqlContext.table(s"$db.t"), df.union(df)) } } @@ -220,7 +220,7 @@ class MultiDatabaseSuite extends QueryTest with SQLTestUtils with TestHiveSingle hiveContext.refreshTable("t") checkAnswer( sqlContext.table("t"), - df.withColumn("p", lit(1)).unionAll(df.withColumn("p", lit(2)))) + df.withColumn("p", lit(1)).union(df.withColumn("p", lit(2)))) } } } @@ -252,7 +252,7 @@ class MultiDatabaseSuite extends QueryTest with SQLTestUtils with TestHiveSingle hiveContext.refreshTable(s"$db.t") checkAnswer( sqlContext.table(s"$db.t"), - df.withColumn("p", lit(1)).unionAll(df.withColumn("p", lit(2)))) + df.withColumn("p", lit(1)).union(df.withColumn("p", lit(2)))) } } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/ParquetHadoopFsRelationSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/ParquetHadoopFsRelationSuite.scala index 1e5dbd991edad..a15bd227a9201 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/sources/ParquetHadoopFsRelationSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/sources/ParquetHadoopFsRelationSuite.scala @@ -137,7 +137,7 @@ class ParquetHadoopFsRelationSuite extends HadoopFsRelationTest { fs.delete(commonSummaryPath, true) df.write.mode(SaveMode.Append).parquet(path) - checkAnswer(sqlContext.read.parquet(path), df.unionAll(df)) + checkAnswer(sqlContext.read.parquet(path), df.union(df)) assert(fs.exists(summaryPath)) assert(fs.exists(commonSummaryPath)) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/hadoopFsRelationSuites.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/hadoopFsRelationSuites.scala index e842caf5bec13..ea7e9057423e0 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/sources/hadoopFsRelationSuites.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/sources/hadoopFsRelationSuites.scala @@ -60,7 +60,7 @@ abstract class HadoopFsRelationTest extends QueryTest with SQLTestUtils with Tes p2 <- Seq("foo", "bar") } yield (i, s"val_$i", 2, p2)).toDF("a", "b", "p1", "p2") - lazy val partitionedTestDF = partitionedTestDF1.unionAll(partitionedTestDF2) + lazy val partitionedTestDF = partitionedTestDF1.union(partitionedTestDF2) def checkQueries(df: DataFrame): Unit = { // Selects everything @@ -191,7 +191,7 @@ abstract class HadoopFsRelationTest extends QueryTest with SQLTestUtils with Tes sqlContext.read.format(dataSourceName) .option("dataSchema", dataSchema.json) .load(file.getCanonicalPath).orderBy("a"), - testDF.unionAll(testDF).orderBy("a").collect()) + testDF.union(testDF).orderBy("a").collect()) } } @@ -268,7 +268,7 @@ abstract class HadoopFsRelationTest extends QueryTest with SQLTestUtils with Tes sqlContext.read.format(dataSourceName) .option("dataSchema", dataSchema.json) .load(file.getCanonicalPath), - partitionedTestDF.unionAll(partitionedTestDF).collect()) + partitionedTestDF.union(partitionedTestDF).collect()) } } @@ -332,7 +332,7 @@ abstract class HadoopFsRelationTest extends QueryTest with SQLTestUtils with Tes testDF.write.format(dataSourceName).mode(SaveMode.Append).saveAsTable("t") withTable("t") { - checkAnswer(sqlContext.table("t"), testDF.unionAll(testDF).orderBy("a").collect()) + checkAnswer(sqlContext.table("t"), testDF.union(testDF).orderBy("a").collect()) } } @@ -415,7 +415,7 @@ abstract class HadoopFsRelationTest extends QueryTest with SQLTestUtils with Tes .saveAsTable("t") withTable("t") { - checkAnswer(sqlContext.table("t"), partitionedTestDF.unionAll(partitionedTestDF).collect()) + checkAnswer(sqlContext.table("t"), partitionedTestDF.union(partitionedTestDF).collect()) } } @@ -625,7 +625,7 @@ abstract class HadoopFsRelationTest extends QueryTest with SQLTestUtils with Tes .format(dataSourceName) .option("dataSchema", df.schema.json) .load(dir.getCanonicalPath), - df.unionAll(df)) + df.union(df)) // This will fail because AlwaysFailOutputCommitter is used when we do append. intercept[Exception] { From 922ec433777f758ce997d1998d38812dde37ff88 Mon Sep 17 00:00:00 2001 From: Reynold Xin Date: Thu, 24 Mar 2016 17:24:22 -0700 Subject: [PATCH 2/2] fix style --- sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index b377a3580c6cf..ec0b3c78ed72c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -1361,8 +1361,8 @@ class Dataset[T] private[sql]( * by a [[distinct]]. * * @group typedrel - * @since 2.0.0 - */ + * @since 2.0.0 + */ def union(other: Dataset[T]): Dataset[T] = withTypedPlan { // This breaks caching, but it's usually ok because it addresses a very specific use case: // using union to union many files or partitions.